Nothing
# Script for writing MassBank files
#testtest change
#' Load MassBank compound information lists
#'
#' Loads MassBank compound information lists (i.e. the lists which were created
#' in the first two steps of the MassBank \code{\link{mbWorkflow}} and
#' subsequently edited by hand.).
#'
#' \code{resetInfolists} clears the information lists, i.e. it creates a new
#' empty list in \code{mbdata_archive}. \code{loadInfolist} loads a single CSV
#' file, whereas \code{loadInfolists} loads a whole directory.
#'
#' @aliases loadInfolists loadInfolist resetInfolists
#' @usage loadInfolists(mb, path)
#'
#' loadInfolist(mb, fileName)
#'
#' resetInfolists(mb)
#' @param path Directory in which the namelists reside. All CSV files in this
#' directory will be loaded.
#' @param fileName A single namelist to be loaded.
#' @param mb The \code{mbWorkspace} to load/reset the lists in.
#' @return The new workspace with loaded/reset lists.
#' @author Michael Stravs
#' @examples
#'
#' #
#' \dontrun{mb <- resetInfolists(mb)
#' mb <- loadInfolist(mb, "my_csv_infolist.csv")}
#'
#' @export
loadInfolists <- function(mb, path)
{
archivefiles <- list.files(path, ".csv", full.names=TRUE)
for(afile in archivefiles)
mb <- loadInfolist(mb, afile)
return(mb)
}
# Load an "infolist". This loads a CSV file which should contain the entries
# edited and controlled by hand. All compound infos from fileName are added into the
# global mbdata_archive. Entries with a cpdID which was already present, are substituted
# by new entries from the fileName file.
#' @export
loadInfolist <- function(mb, fileName)
{
# Prime a new infolist if it doesn't exist
if(ncol(mb@mbdata_archive) == 0)
mb <- resetInfolists(mb)
mbdata_new <- read.csv(fileName, sep=",", stringsAsFactors=FALSE)
# Legacy check for loading the Uchem format files.
# Even if dbname_* are not used downstream of here, it's still good to keep them
# for debugging reasons.
n <- colnames(mbdata_new)
cols <- c("id","dbcas","dataused")
# Check if comma-separated or semicolon-separated
d <- setdiff(cols, n)
if(length(d)>0){
mbdata_new <- read.csv2(fileName, stringsAsFactors=FALSE)
n <- colnames(mbdata_new)
d2 <- setdiff(cols, n)
if(length(d2) > 0){
stop("Some columns are missing in the infolist.")
}
}
if("dbname_d" %in% colnames(mbdata_new))
{
colnames(mbdata_new)[[which(colnames(mbdata_new)=="dbname_d")]] <- "dbname"
# dbname_e will be dropped because of the select= in the subset below.
}
if("COMMENT.EAWAG_UCHEM_ID" %in% colnames(mbdata_new))
colnames(mbdata_new)[[which(colnames(mbdata_new)== "COMMENT.EAWAG_UCHEM_ID")]] <-
"COMMENT.ID"
# Clear from padding spaces and NAs
mbdata_new <- as.data.frame(x = t(apply(mbdata_new, 1, function(r)
{
# Substitute empty spaces by real NA values
r[which(r == "")] <- NA
# Trim spaces (in all non-NA fields)
r[which(!is.na(r))] <- sub(pattern = "^ *([^ ]+) *$", replacement = "\\1", x = r[which(!is.na(r))])
return(r)
})), stringsAsFactors = FALSE)
# use only the columns present in mbdata_archive, no other columns added in excel
colNames <- colnames(mb@mbdata_archive)
commentColNames <- colnames(mbdata_new)[grepl(x = colnames(mbdata_new), pattern = "^COMMENT\\.(?!CONFIDENCE)(?!ID)", perl = TRUE)]
colNames <- c(colNames, commentColNames)
## The read infolists might not have all required / expected columns
missingColNames <- colNames[! colNames %in% colnames(mbdata_new)]
if (length(missingColNames >0)) {
missingCols <- matrix(NA, ncol=length(missingColNames))
colnames(missingCols) <- missingColNames
mbdata_new <- cbind(mbdata_new, missingCols)
}
mbdata_new <- mbdata_new[, colNames]
# substitute the old entires with the ones from our files
# then find the new (previously inexistent) entries, and rbind them to the table
new_entries <- setdiff(mbdata_new$id, mb@mbdata_archive$id)
old_entries <- intersect(mbdata_new$id, mb@mbdata_archive$id)
for(colname in colnames(mb@mbdata_archive))
mb@mbdata_archive[, colname] <- as.character(mb@mbdata_archive[, colname])
for(entry in old_entries)
mb@mbdata_archive[mb@mbdata_archive$id == entry,] <- mbdata_new[mbdata_new$id == entry,]
mb@mbdata_archive <- rbind(mb@mbdata_archive, mbdata_new[mbdata_new$id==new_entries,])
for(colname in colnames(mb@mbdata_archive))
mb@mbdata_archive[, colname] <- as.factor(mb@mbdata_archive[, colname])
return(mb)
}
# Resets the mbdata_archive to an empty version.
#' @export
resetInfolists <- function(mb)
{
mb@mbdata_archive <-
structure(list(X = integer(0), id = integer(0), dbcas = character(0),
dbname = character(0), dataused = character(0), COMMENT.CONFIDENCE = character(0),
COMMENT.ID = integer(0), CH.NAME1 = character(0),
CH.NAME2 = character(0), CH.NAME3 = character(0), CH.NAME4 = character(0), CH.NAME5 = character(0), CH.COMPOUND_CLASS = character(0),
CH.FORMULA = character(0), CH.EXACT_MASS = numeric(0), CH.SMILES = character(0),
CH.IUPAC = character(0), CH.LINK.CAS = character(0), CH.LINK.CHEBI = integer(0),
CH.LINK.HMDB = character(0), CH.LINK.KEGG = character(0), CH.LINK.LIPIDMAPS = character(0),
CH.LINK.PUBCHEM = character(0), CH.LINK.INCHIKEY = character(0),
CH.LINK.CHEMSPIDER = integer(0), CH.LINK.COMPTOX = character(0)), .Names = c("X", "id", "dbcas",
"dbname", "dataused", "COMMENT.CONFIDENCE", "COMMENT.ID",
"CH.NAME1", "CH.NAME2", "CH.NAME3", "CH.NAME4", "CH.NAME5", "CH.COMPOUND_CLASS", "CH.FORMULA",
"CH.EXACT_MASS", "CH.SMILES", "CH.IUPAC", "CH.LINK.CAS", "CH.LINK.CHEBI",
"CH.LINK.HMDB", "CH.LINK.KEGG", "CH.LINK.LIPIDMAPS", "CH.LINK.PUBCHEM",
"CH.LINK.INCHIKEY", "CH.LINK.CHEMSPIDER", "CH.LINK.COMPTOX"), row.names = integer(0), class = "data.frame")
if(getOption("RMassBank")$include_sp_tags)
{
mb@mbdata_archive["SP.SAMPLE"] <- character(0)
}
return(mb)
}
# The workflow function, i.e. (almost) the only thing you actually need to call.
# See below for explanation of steps.
#' MassBank record creation workflow
#'
#' Uses data generated by \code{\link{msmsWorkflow}} to create MassBank records.
#'
#' See the vignette \code{vignette("RMassBank")} for detailed informations about the usage.
#'
#' Steps:
#'
#' Step 1: Find which compounds don't have annotation information yet. For these
#' compounds, pull information from several databases (using gatherData).
#'
#' Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
#' Otherwise, continue.
#'
#' Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
#'
#' Step 4: Compile the spectra. Using the skeletons from the archive data, create
#' MassBank records per compound and fill them with peak data for each spectrum.
#' Also, assign accession numbers based on scan mode and relative scan no.
#'
#' Step 5: Convert the internal tree-like representation of the MassBank data into
#' flat-text string arrays (basically, into text-file style, but still in memory)
#'
#' Step 6: For all OK records, generate a corresponding molfile with the structure
#' of the compound, based on the SMILES entry from the MassBank record. (This molfile
#' is still in memory only, not yet a physical file)
#'
#' Step 7: If necessary, generate the appropriate subdirectories, and actually write
#' the files to disk.
#'
#' Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
#' to attribute substances to their corresponding structure molfiles.
#'
#' @param steps Which steps in the workflow to perform.
#' @param infolist_path A path where to store newly downloaded compound informations,
#' which should then be manually inspected.
#' @param mb The \code{mbWorkspace} to work in.
#' @param gatherData A variable denoting whether to retrieve information using several online databases \code{gatherData= "online"}
#' or to use the local babel installation \code{gatherData= "babel"}. Note that babel is used either way, if a directory is given
#' in the settings. This setting will be ignored if retrieval is set to "standard"
#' @param filter If \code{TRUE}, the peaks will be filtered according to the standard processing workflow in RMassBank -
#' only the best formula for a peak is retained, and only peaks passing multiplicity filtering are retained. If FALSE, it is assumed
#' that the user has already done filtering, and all peaks in the spectrum should be printed in the record (with or without formula.)
#' @return The processed \code{mbWorkspace}.
#' @seealso \code{\link{mbWorkspace-class}}
#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
#' @examples \dontrun{
#' mb <- newMbWorkspace(w) # w being a msmsWorkspace
#' mb <- loadInfolists(mb, "D:/myInfolistPath")
#' mb <- mbWorkflow(mb, steps=c(1:3), "newinfos.csv")
#'
#' }
#' @export
mbWorkflow <- function(mb, steps=c(1,2,3,4,5,6,7,8), infolist_path="./infolist.csv", gatherData = "online", filter = TRUE)
{
# Step 1: Find which compounds don't have annotation information yet. For these
# compounds, pull information from CTS (using gatherData).
if(1 %in% steps)
{
mbdata_ids <- lapply(selectSpectra(mb@spectra, "found", "object"), function(spec) spec@id)
message("mbWorkflow: Step 1. Gather info from several databases")
# Which IDs are not in mbdata_archive yet?
new_ids <- setdiff(as.numeric(unlist(mbdata_ids)), mb@mbdata_archive$id)
mb@mbdata <- lapply(new_ids, function(id)
{
if(findLevel(id,TRUE) == "standard"){
if(gatherData == "online"){
d <- gatherData(id)
}
if(gatherData == "babel"){
# message("mbWorkflow: Step 1. Gather info using babel")
d <- gatherDataBabel(id)
}
} else{
# message("mbWorkflow: Step 1. Gather no info - Unknown structure")
d <- gatherDataUnknown(id, mb@spectra[[1]]@mode, retrieval=findLevel(id,TRUE))
}
message(paste(id, ": ", d$dataused, sep=''))
return(d)
})
}
# Step 2: If new compounds were found, then export the infolist.csv and stop the workflow.
# Otherwise, continue!
if(2 %in% steps)
{
message("mbWorkflow: Step 2. Export infolist (if required)")
if(length(mb@mbdata)>0)
{
mbdata_mat <- flatten(mb@mbdata)
write.csv(as.data.frame(mbdata_mat),infolist_path, na="")
message(paste("The file", infolist_path, "was generated with new compound information. Please check and edit the table, and add it to your infolist folder."))
return(mb)
}
else
message("No new data added.")
}
# Step 3: Take the archive data (in table format) and reformat it to MassBank tree format.
if(3 %in% steps)
{
message("mbWorkflow: Step 3. Data reformatting")
mb@mbdata_relisted <- apply(mb@mbdata_archive, 1, readMbdata)
}
# Step 4: Compile the spectra! Using the skeletons from the archive data, create
# MassBank records per compound and fill them with peak data for each spectrum.
# Also, assign accession numbers based on scan mode and relative scan no.
if(4 %in% steps)
{
message("mbWorkflow: Step 4. Spectra compilation")
mb@compiled <- lapply(
selectSpectra(mb@spectra, "found", "object"),
function(r) {
message(paste("Compiling: ", r@name, sep=""))
mbdata <- mb@mbdata_relisted[[which(mb@mbdata_archive$id == as.numeric(r@id))]]
if(filter)
res <- buildRecord(r, mbdata=mbdata, additionalPeaks=mb@additionalPeaks, filter = filterOK & best)
else
res <- buildRecord(r, mbdata=mbdata, additionalPeaks=mb@additionalPeaks)
return(res)
})
# check which compounds have useful spectra
mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
#mb@ok <- which(!is.na(mb@compiled) & !(lapply(mb@compiled, length)==0))
mb@problems <- which(is.na(mb@compiled))
mb@compiled_ok <- mb@compiled[mb@ok]
mb@compiled_notOk <- mb@compiled[!mb@ok]
}
# Step 5: Convert the internal tree-like representation of the MassBank data into
# flat-text string arrays (basically, into text-file style, but still in memory)
if(5 %in% steps)
{
message("mbWorkflow: [Legacy Step 5. Flattening records] ignored")
#mb@mbfiles <- lapply(mb@compiled_ok, function(cpd) toMassbank(cpd, mb@additionalPeaks))
#mb@mbfiles_notOk <- lapply(mb@compiled_notOk, function(c) lapply(c, toMassbank))
}
# Step 6: For all OK records, generate a corresponding molfile with the structure
# of the compound, based on the SMILES entry from the MassBank record. (This molfile
# is still in memory only, not yet a physical file)
if(6 %in% steps)
{
if(RMassBank.env$export.molfiles){
message("mbWorkflow: Step 6. Generate molfiles")
mb@molfile <- lapply(mb@compiled_ok, function(c) createMolfile(as.numeric(c@id)))
} else
warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 6 is therefore ignored.")
}
# Step 7: If necessary, generate the appropriate subdirectories, and actually write
# the files to disk.
if(7 %in% steps)
{
message("mbWorkflow: Step 7. Generate subdirs and export")
## create folder
filePath_recData_valid <- file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata")
filePath_recData_invalid <- file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata_invalid")
filePath_molData <- file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata")
if(!file.exists(filePath_recData_valid)) if(!dir.create(filePath_recData_valid,recursive=TRUE)) stop(paste("Could not create folder", filePath_recData_valid))
if(RMassBank.env$export.molfiles)
if(!file.exists(filePath_molData)) if(!dir.create(filePath_molData,recursive=TRUE)) stop(paste("Could not create folder", filePath_molData))
if(RMassBank.env$export.invalid & length(mb@mbfiles_notOk) > 0)
if(!file.exists(filePath_recData_invalid)) if(!dir.create(filePath_recData_invalid,recursive=TRUE)) stop(paste("Could not create folder", filePath_recData_invalid))
if(length(mb@molfile) == 0)
mb@molfile <- as.list(rep(x = NA, times = length(mb@compiled_ok)))
## export valid spectra
for(cnt in seq_along(mb@compiled_ok)){
exportMassbank_recdata(
mb@compiled_ok[[cnt]],
recDataFolder = filePath_recData_valid
)
if(RMassBank.env$export.molfiles)
exportMassbank_moldata(
mb@compiled_ok[[cnt]],
molfile = mb@molfile[[cnt]],
molDataFolder = filePath_molData
)
}
## export invalid spectra
for(cnt in seq_along(mb@compiled_notOk))
exportMassbank_recdata(
compiled = mb@mbfiles_notOk[[cnt]],
recDataFolder = filePath_recData_invalid
)
}
# Step 8: Create the list.tsv in the molfiles folder, which is required by MassBank
# to attribute substances to their corresponding structure molfiles.
if(8 %in% steps)
{
if(RMassBank.env$export.molfiles){
message("mbWorkflow: Step 8. Create list.tsv")
makeMollist(compiled = mb@compiled_ok)
} else
warning("RMassBank is configured not to export molfiles (RMassBank.env$export.molfiles). Step 8 is therefore ignored.")
}
return(mb)
}
# Calls openbabel and converts the SMILES code string (or retrieves the SMILES code from
# the ID, and then calls openbabel) to create a molfile in text format.
# If fileName is given, the file is directly stored. Otherwise, it is returned as a
# character array.
#' Create MOL file for a chemical structure
#'
#' Creates a MOL file (in memory or on disk) for a compound specified by the
#' compound ID or by a SMILES code.
#'
#' The function invokes OpenBabel (and therefore needs a correctly set
#' OpenBabel path in the RMassBank settings), using the SMILES code retrieved
#' with \code{findSmiles} or using the SMILES code directly. The current
#' implementation of the workflow uses the latter version, reading the SMILES
#' code directly from the MassBank record itself.
#'
#' @usage createMolfile(id_or_smiles, fileName = FALSE)
#' @param id_or_smiles The compound ID or a SMILES code.
#' @param fileName If the filename is set, the file is written directly to disk
#' using the specified filename. Otherwise, it is returned as a text array.
#' @return A character array containing the MOL/SDF format file, ready to be
#' written to disk.
#' @author Michael Stravs
#' @seealso \code{\link{findSmiles}}
#' @references OpenBabel: \url{http://openbabel.org}
#' @examples
#'
#' # Benzene:
#' \dontrun{
#' createMolfile("C1=CC=CC=C1")
#' }
#'
#' @export
createMolfile <- function(id_or_smiles, fileName = FALSE)
{
.checkMbSettings()
babeldir <- getOption("RMassBank")$babeldir
if(!is.numeric(id_or_smiles)){
smiles <- id_or_smiles
} else{
if(findLevel(id_or_smiles,TRUE) != "standard"){
return(c(" ","$$$$"))
}
smiles <- findSmiles(id_or_smiles)
}
# if no babeldir was set, get the result from cactus.
if(is.na(babeldir))
{
res <- getCactus(smiles, "sdf")
if(any(is.na(res))){
res <- getPcSDF(smiles)
}
if(any(is.na(res))){
stop("Pubchem and Cactus both seem to be down.")
}
if(is.character(fileName))
writeLines(res, fileName)
}
# otherwise use the better-tested OpenBabel toolkit.
else
{
if(!is.character(fileName))
cmd <- paste(babeldir, "babel -ismi -osdf -d -b --gen2D", sep='')
else
cmd <- paste(babeldir, "babel -ismi -osdf ", fileName , " -d -b --gen2D", sep='')
res <- system(cmd, intern=TRUE, input=smiles, ignore.stderr=TRUE)
# If we wrote to a file, read it back as return value.
if(is.character(fileName))
res <- readLines(fileName)
}
#return(c(" ","$$$$"))
return(res)
}
# Retrieve annotation data for a compound, from the internet service Pubchem
#' Retrieve supplemental annotation data from Pubchem
#'
#' Retrieves annotation data for a compound from the internet service Pubchem
#' based on the inchikey generated by babel or Cactus
#'
#' The data retrieved is the Pubchem CID, a synonym from the Pubchem database,
#' the IUPAC name (using the preferred if available) and a Chebi link
#'
#' @usage gatherPubChem(key)
#' @param key An Inchi-Key
#' @return Returns a list with 4 slots:
#' \code{PcID} The Pubchem CID
#' \code{Synonym} An arbitrary synonym for the compound name
#' \code{IUPAC} A IUPAC-name (preferred if available)
#' \code{Chebi} The identification number of the chebi database
#' @author Erik Mueller
#' @seealso \code{\link{mbWorkflow}}
#' @references Pubchem REST:
#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
#' Chebi:
#' \url{http://www.ebi.ac.uk/chebi}
#' @examples
#'
#' # Gather data for compound ID 131
#' \dontrun{gatherPubChem("QEIXBXXKTUNWDK-UHFFFAOYSA-N")}
#'
#' @export
gatherPubChem <- function(key){
PubChemData <- list()
##Trycatches are there because pubchem has connection issues 1 in 50 times.
##Write NA into the respective fields if something goes wrong with the conenction or the data.
##Retrieve Pubchem CID
tryCatch(
PubChemData$PcID <- getPcId(key),
error=function(e){
PubChemData$PcID <<- NA
})
##Retrieve a synonym to the name
tryCatch(
PubChemData$Synonym <- getPcSynonym(key),
error=function(e){
PubChemData$Synonym <<- NA
})
##Retrieve the IUPAC-name
tryCatch(
PubChemData$IUPAC <- getPcIUPAC(key),
error=function(e){
PubChemData$IUPAC <<- NA
})
##Retrieve the Chebi-ID
tryCatch(
PubChemData$Chebi <- getPcCHEBI(key),
error=function(e){
PubChemData$Chebi <<- NA
})
return(PubChemData)
}
# Retrieve annotation data for a compound, from the internet services Cactvs, Pubchem, Chemspider and CTS.
#' Retrieve annotation data
#'
#' Retrieves annotation data for a compound from the internet services CTS, Pubchem, Chemspider and
#' Cactvs, based on the SMILES code and name of the compounds stored in the
#' compound list.
#'
#' Composes the "upper part" of a MassBank record filled with chemical data
#' about the compound: name, exact mass, structure, CAS no., links to PubChem,
#' KEGG, ChemSpider. The instrument type is also written into this block (even
#' if not strictly part of the chemical information). Additionally, index
#' fields are added at the start of the record, which will be removed later:
#' \code{id, dbcas, dbname} from the compound list, \code{dataused} to indicate
#' the used identifier for CTS search (\code{smiles} or \code{dbname}).
#'
#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
#' inserted empty and will be filled later on.
#'
#' @usage gatherData(id)
#' @aliases gatherData
#' @param id The compound ID.
#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
#' @author Michael Stravs
#' @seealso \code{\link{mbWorkflow}}
#' @references Chemical Translation Service:
#' \url{http://uranus.fiehnlab.ucdavis.edu:8080/cts/homePage}
#' cactus Chemical Identifier Resolver:
#' \url{http://cactus.nci.nih.gov/chemical/structure}
#' MassBank record format:
#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
#' Pubchem REST:
#' \url{https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html}
#' Chemspider InChI conversion:
#' \url{https://www.chemspider.com/InChI.asmx}
#' @examples
#'
#' # Gather data for compound ID 131
#' \dontrun{gatherData(131)}
#'
#' @export
gatherData <- function(id)
{
##Preamble: Is a babeldir supplied?
##If yes, use it
.checkMbSettings()
usebabel=TRUE
babeldir <- getOption("RMassBank")$babeldir
if(is.na(babeldir)){
usebabel=FALSE
}
##Get all useful information from the local "database" (from the CSV sheet)
smiles <- findSmiles(id)
mass <- findMass(smiles)
dbcas <- findCAS(id)
dbname <- findName(id)
if(is.na(dbname)) dbname <- ""
if(is.na(dbcas)) dbcas <- ""
iupacName <- dbname
synonym <- dbname
formula <- findFormula(id)
##Convert SMILES to InChI key via Cactvs or babel. CTS doesn't "interpret" the SMILES per se,
##it just matches identical known SMILES, so we need to convert to a "searchable" and
##standardized format beforehand. Other databases are able to interpret the smiles.
if(usebabel){
cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
inchikey_split <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
} else{
inchikey <- getCactus(smiles, 'stdinchikey')
if(!is.na(inchikey)){
##Split the "InChiKey=" part off the key
inchikey_split <- strsplit(inchikey, "=", fixed=TRUE)[[1]][[2]]
} else{
inchikey_split <- getPcInchiKey(smiles)
}
}
##Use Pubchem to retrieve information
PcInfo <- gatherPubChem(inchikey_split)
if(!is.null(PcInfo$Synonym) & !is.na(PcInfo$Synonym)){
synonym <- PcInfo$Synonym
}
if(!is.null(PcInfo$IUPAC) & !is.na(PcInfo$IUPAC)){
iupacName <- PcInfo$IUPAC
}
##Get Chemspider-ID
csid <- getCSID(inchikey_split)
if(is.na(csid)){
##Get ChemSpider ID from Cactus if the Chemspider page is down
csid <- getCactus(inchikey_split, 'chemspider_id')
}
##Get CompTox
comptox <- getCompTox(inchikey_split)
if(is.null(comptox)){
comptox <- NA
}
##Use CTS to retrieve information
CTSinfo <- getCtsRecord(inchikey_split)
if((CTSinfo[1] == "Sorry, we couldn't find any matching results") || is.null(CTSinfo[1]))
{
CTSinfo <- NA
}
##List the names
if(iupacName == ""){
warning(paste0("Compound ID ",id,": no IUPAC name could be identified."))
}
if(toupper(dbname) == toupper(synonym)){
synonym <- dbname
}
if(toupper(dbname) == toupper(iupacName)){
iupacName <- dbname
}
if(toupper(synonym) == toupper(iupacName)){
synonym <- iupacName
}
names <- as.list(unique(c(dbname, synonym, iupacName)))
##If no name is found, it must be supplied in one way or another
if(all(sapply(names, function(x) x == ""))){
stop("RMassBank wasn't able to extract a usable name for this compound from any database. Please supply a name manually.")
}
# Start to fill the MassBank record.
# The top 4 entries will not go into the final record; they are used to identify
# the record and also to facilitate manual editing of the exported record table.
mbdata <- list()
mbdata[['id']] <- id
mbdata[['dbcas']] <- dbcas
mbdata[['dbname']] <- dbname
mbdata[['dataused']] <- "smiles"
mbdata[['ACCESSION']] <- ""
mbdata[['RECORD_TITLE']] <- ""
mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
# Confidence annotation and internal ID annotation.
# The ID of the compound will be written like:
# COMMENT: EAWAG_UCHEM_ID 1234
# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
mbdata[["COMMENT"]] <- list()
if(findLevel(id) == "0"){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
} else{
level <- findLevel(id)
if(level %in% c("1","1a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
}
if(level == c("2")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
}
if(level == c("2a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
}
if(level == c("2b")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
}
if(level == c("3")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
}
if(level == c("3a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
}
if(level == c("3b")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
}
if(level == c("3c")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
}
if(level == c("3d")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
}
if(level == c("4")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
}
if(level == c("5")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
}
}
mbdata[["COMMENT"]][["ID"]] = id
## add generic COMMENT information
rowIdx <- which(.listEnvEnv$listEnv$compoundList$ID == id)
properties <- colnames(.listEnvEnv$listEnv$compoundList)
properties2 <- gsub(x = properties, pattern = "^COMMENT ", replacement = "")
theseProperties <- grepl(x = properties, pattern = "^COMMENT ")
theseProperties <- theseProperties & (!(unlist(.listEnvEnv$listEnv$compoundList[rowIdx, ]) == "NA" | is.na(unlist(.listEnvEnv$listEnv$compoundList[rowIdx, ]))))
mbdata[["COMMENT"]][properties2[theseProperties]] <- unlist(.listEnvEnv$listEnv$compoundList[rowIdx, theseProperties])
# here compound info starts
mbdata[['CH$NAME']] <- names
# Currently we use a fixed value for Compound Class, since there is no useful
# convention of what should go there and what shouldn't, and the field is not used
# in search queries.
mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
mbdata[['CH$FORMULA']] <- formula
mbdata[['CH$EXACT_MASS']] <- mass
mbdata[['CH$SMILES']] <- smiles
if(usebabel){
cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
mbdata[['CH$IUPAC']] <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
} else{
mbdata[['CH$IUPAC']] <- getCactus(smiles, "stdinchi")
}
# Add all CH$LINK fields present in the compound datasets
link <- list()
# CAS
if(!is.na(CTSinfo[1])){
if("CAS" %in% CTS.externalIdTypes(CTSinfo))
{
# Prefer database CAS if it is also listed in the CTS results.
# otherwise take the shortest one.
cas <- CTS.externalIdSubset(CTSinfo,"CAS")
if(dbcas %in% cas)
link[["CAS"]] <- dbcas
else
link[["CAS"]] <- cas[[which.min(nchar(cas))]]
} else{
if(dbcas != ""){
link[["CAS"]] <- dbcas
}
}
} else{
if(dbcas != ""){
link[["CAS"]] <- dbcas
}
}
# CHEBI
if(is.na(PcInfo$Chebi[1])){
if(!is.na(CTSinfo[1])){
if("ChEBI" %in% CTS.externalIdTypes(CTSinfo))
{
# Cut off front "CHEBI:" if present
chebi <- CTS.externalIdSubset(CTSinfo,"ChEBI")
chebi <- chebi[[which.min(nchar(chebi))]]
chebi <- strsplit(chebi,":")[[1]]
link[["CHEBI"]] <- chebi[[length(chebi)]]
}
}
} else{
chebi <- PcInfo$Chebi
chebi <- chebi[[which.min(nchar(chebi))]]
chebi <- strsplit(chebi,":")[[1]]
link[["CHEBI"]] <- chebi[[length(chebi)]]
}
# HMDB
if(!is.na(CTSinfo[1])){
if("Human Metabolome Database" %in% CTS.externalIdTypes(CTSinfo))
link[["HMDB"]] <- CTS.externalIdSubset(CTSinfo,"HMDB")[[1]]
# KEGG
if("KEGG" %in% CTS.externalIdTypes(CTSinfo))
link[["KEGG"]] <- CTS.externalIdSubset(CTSinfo,"KEGG")[[1]]
# LipidMAPS
if("LipidMAPS" %in% CTS.externalIdTypes(CTSinfo))
link[["LIPIDMAPS"]] <- CTS.externalIdSubset(CTSinfo,"LipidMAPS")[[1]]
}
# PubChem CID
if(is.na(PcInfo$PcID[1])){
if(!is.na(CTSinfo[1])){
if("PubChem CID" %in% CTS.externalIdTypes(CTSinfo))
{
pc <- CTS.externalIdSubset(CTSinfo,"PubChem CID")
link[["PUBCHEM"]] <- paste0(min(pc))
}
}
} else{
link[["PUBCHEM"]] <- PcInfo$PcID[1]
}
if(!is.null(link[["PUBCHEM"]])){
if(substr(link[["PUBCHEM"]],1,4) != "CID:"){
link[["PUBCHEM"]] <- paste0("CID:", link[["PUBCHEM"]])
}
}
link[["INCHIKEY"]] <- inchikey_split
link[["COMPTOX"]] <- comptox
if(length(csid)>0) if(any(!is.na(csid))) link[["CHEMSPIDER"]] <- min(as.numeric(as.character(csid[!is.na(csid)])))
mbdata[['CH$LINK']] <- link
return(mbdata)
}
# Retrieve annotation data for a compound, using only babel
#' Retrieve annotation data
#'
#' Retrieves annotation data for a compound by using babel,
#' based on the SMILES code and name of the compounds stored in the
#' compound list.
#'
#' Composes the "upper part" of a MassBank record filled with chemical data
#' about the compound: name, exact mass, structure, CAS no..
#' The instrument type is also written into this block (even
#' if not strictly part of the chemical information). Additionally, index
#' fields are added at the start of the record, which will be removed later:
#' \code{id, dbcas, dbname} from the compound list.
#'
#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
#' inserted empty and will be filled later on.
#'
#' This function is an alternative to gatherData, in case CTS is down or if information
#' on one or more of the compounds in the compound list are sparse
#'
#' @usage gatherDataBabel(id)
#' @param id The compound ID.
#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
#' @author Michael Stravs, Erik Mueller
#' @seealso \code{\link{mbWorkflow}}
#' @references MassBank record format:
#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
#' @examples
#'
#' # Gather data for compound ID 131
#' \dontrun{gatherDataBabel(131)}
#'
#' @export
gatherDataBabel <- function(id){
.checkMbSettings()
babeldir <- getOption("RMassBank")$babeldir
smiles <- findSmiles(id)
# if no babeldir was set, throw an error that says that either CTS or babel have to be used
if(is.na(babeldir))
{
stop("No babeldir supplied; It is currently not possible to convert the information without either babel or CTS")
} else {
###Babel conversion
cmdinchikey <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchikey')
inchikey <- system(cmdinchikey, intern=TRUE, input=smiles, ignore.stderr=TRUE)
cmdinchi <- paste0(babeldir, 'obabel -:"',smiles,'" ', '-oinchi')
inchi <- system(cmdinchi, intern=TRUE, input=smiles, ignore.stderr=TRUE)
##Read from Compoundlist
smiles <- findSmiles(id)
mass <- findMass(smiles)
dbcas <- findCAS(id)
dbname <- findName(id)
if(is.na(dbname)) dbname <- ""
if(is.na(dbcas)) dbcas <- ""
formula <- findFormula(id)
##Create
mbdata <- list()
mbdata[['id']] <- id
mbdata[['dbcas']] <- dbcas
mbdata[['dbname']] <- dbname
mbdata[['dataused']] <- "smiles"
mbdata[['ACCESSION']] <- ""
mbdata[['RECORD_TITLE']] <- ""
mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
# Confidence annotation and internal ID annotation.
# The ID of the compound will be written like:
# COMMENT: EAWAG_UCHEM_ID 1234
# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
mbdata[["COMMENT"]] <- list()
if(findLevel(id) == "0"){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
} else{
level <- findLevel(id)
if(level %in% c("1","1a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
}
if(level == c("2")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
}
if(level == c("2a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
}
if(level == c("2b")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
}
if(level == c("3")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
}
if(level == c("3a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
}
if(level == c("3b")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
}
if(level == c("3c")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
}
if(level == c("3d")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
}
if(level == c("4")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
}
if(level == c("5")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
}
}
mbdata[["COMMENT"]][["ID"]] <- id
# here compound info starts
mbdata[['CH$NAME']] <- as.list(dbname)
# Currently we use a fixed value for Compound Class, since there is no useful
# convention of what should go there and what shouldn't, and the field is not used
# in search queries.
mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
mbdata[['CH$FORMULA']] <- formula
mbdata[['CH$EXACT_MASS']] <- mass
mbdata[['CH$SMILES']] <- smiles
mbdata[['CH$IUPAC']] <- inchi
link <- list()
if(dbcas != "")
link[["CAS"]] <- dbcas
link[["INCHIKEY"]] <- inchikey
mbdata[['CH$LINK']] <- link
}
return(mbdata)
}
# Retrieve annotation data for a compound, using only babel
#' Retrieve annotation data
#'
#' Retrieves annotation data for an unknown compound by using basic information present
#'
#' Composes the "upper part" of a MassBank record filled with chemical data
#' about the compound: name, exact mass, structure, CAS no..
#' The instrument type is also written into this block (even
#' if not strictly part of the chemical information). Additionally, index
#' fields are added at the start of the record, which will be removed later:
#' \code{id, dbcas, dbname} from the compound list.
#'
#' Additionally, the fields \code{ACCESSION} and \code{RECORD_TITLE} are
#' inserted empty and will be filled later on.
#'
#' This function is used to generate the data in case a substance is unknown,
#' i.e. not enough information is present to derive anything about formulas or links
#'
#' @usage gatherDataUnknown(id, mode, retrieval)
#' @param id The compound ID.
#' @param mode \code{"pH", "pNa", "pM", "pNH4", "mH", "mM", "mFA"} for different ions
#' ([M+H]+, [M+Na]+, [M]+, [M+NH4]+, [M-H]-, [M]-, [M+FA]-).
#' @param retrieval A value that determines whether the files should be handled either as "standard",
#' if the compoundlist is complete, "tentative", if at least a formula is present or "unknown"
#' if the only know thing is the m/z
#' @return Returns a list of type \code{list(id= \var{compoundID}, ...,
#' 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc. %% ...
#' @author Michael Stravs, Erik Mueller
#' @seealso \code{\link{mbWorkflow}}
#' @references MassBank record format:
#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
#' @examples
#'
#' # Gather data for compound ID 131
#' \dontrun{gatherDataUnknown(131,"pH")}
#'
#' @export
gatherDataUnknown <- function(id, mode, retrieval){
.checkMbSettings()
##Read from Compoundlist
smiles <- ""
if(retrieval == "unknown"){
mass <- findMass(id, "unknown", mode)
formula <- ""
}
if(retrieval == "tentative"){
mass <- findMass(id, "tentative", mode)
formula <- findFormula(id, "tentative")
}
dbcas <- NA
dbname <- findName(id)
if(is.na(dbname)) dbname <- paste("Unknown ID:",id)
if(is.na(dbcas)) dbcas <- ""
##Create
mbdata <- list()
mbdata[['id']] <- id
mbdata[['dbcas']] <- dbcas
mbdata[['dbname']] <- dbname
mbdata[['dataused']] <- "none"
mbdata[['ACCESSION']] <- ""
mbdata[['RECORD_TITLE']] <- ""
mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
# Confidence annotation and internal ID annotation.
# The ID of the compound will be written like:
# COMMENT: EAWAG_UCHEM_ID 1234
# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
mbdata[["COMMENT"]] <- list()
if(findLevel(id) == "0"){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- getOption("RMassBank")$annotations$confidence_comment
} else{
level <- findLevel(id)
if(level %in% c("1","1a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Reference Standard (Level 1)"
}
if(level == c("2")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure, tentative identification (Level 2)"
}
if(level == c("2a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via library match, tentative identification (Level 2a)"
}
if(level == c("2b")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Probable structure via diagnostic evidence, tentative identification (Level 2b)"
}
if(level == c("3")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification only (Level 3)"
}
if(level == c("3a")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: most likely structure (Level 3)"
}
if(level == c("3b")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: isomers possible (Level 3)"
}
if(level == c("3c")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: substance class known (Level 3)"
}
if(level == c("3d")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: best match only (Level 3)"
}
if(level == c("4")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: molecular formula only (Level 4)"
}
if(level == c("5")){
mbdata[["COMMENT"]][["CONFIDENCE"]] <- "Tentative identification: structure and formula unknown (Level 5)"
}
}
mbdata[["COMMENT"]][["ID"]] <- id
# here compound info starts
mbdata[['CH$NAME']] <- as.list(dbname)
# Currently we use a fixed value for Compound Class, since there is no useful
# convention of what should go there and what shouldn't, and the field is not used
# in search queries.
mbdata[['CH$COMPOUND_CLASS']] <- getOption("RMassBank")$annotations$compound_class
mbdata[['CH$FORMULA']] <- formula
mbdata[['CH$EXACT_MASS']] <- mass
mbdata[['CH$SMILES']] <- ""
mbdata[['CH$IUPAC']] <- ""
link <- list()
mbdata[['CH$LINK']] <- link
return(mbdata)
}
# Flatten the internal tree-like representation of MassBank data to a flat table.
# Note that this limits us, in that the fields should be constant over all records!
# Therefore, e.g. the fixed number of 3 names which may be filled.
# If anybody has a cooler solution, I'll be happy to hear from you :)
#
# Note: the records from gatherData have additional information which is discarded, like
# author, copyright etc. They will be re-filled automatically when reading the file.
#' Flatten, or re-read, MassBank header blocks
#'
#' \code{flatten} converts a list of MassBank compound information sets (as
#' retrieved by \code{\link{gatherData}}) to a flat table, to be exported into
#' an \link[=loadInfolist]{infolist}. \code{readMbdata} reads a single record
#' from an infolist flat table back into a MassBank (half-)entry.
#'
#' Neither the flattening system itself nor the implementation are particularly
#' fantastic, but since hand-checking of records is a necessary evil, there is
#' currently no alternative (short of coding a complete GUI for this and
#' working directly on the records.)
#'
#' @aliases flatten readMbdata
#' @usage flatten(mbdata)
#'
#' readMbdata(row)
#' @param mbdata A list of MassBank compound information sets as returned from
#' \code{\link{gatherData}}.
#' @param row One row of MassBank compound information retrieved from an
#' infolist.
#' @return \code{flatten} returns a matrix (not a data frame) to be written to
#' CSV.
#'
#' \code{readMbdata} returns a list of type \code{list(id= \var{compoundID},
#' ..., 'ACCESSION' = '', 'RECORD_TITLE' = '', )} etc.
#' @author Michael Stravs
#' @seealso \code{\link{gatherData}},\code{\link{loadInfolist}}
#' @references MassBank record format:
#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
#' @examples \dontrun{
#' # Collect some data to flatten
#' ids <- c(40,50,60,70)
#' data <- lapply(ids, gatherData)
#' # Flatten the data trees to a table
#' flat.table <- flatten(data)
#' # reimport the table into a tree
#' data.reimported <- apply(flat.table, 1, readMbdata)
#' }
#'
#' @export
#'
flatten <- function(mbdata)
{
.checkMbSettings()
colNames <- names(unlist(mbdata[[1]]))
commentNames <- colNames[grepl(x = colNames, pattern = "^COMMENT\\.")]
colList <- c(
"id",
"dbcas",
"dbname",
"dataused",
commentNames,
#"COMMENT.CONFIDENCE",
# Note: The field name of the internal id field is replaced with the real name
# at "compilation" time. Therefore, functions DOWNSTREAM from compileRecord()
# must use the full name including the info from options("RMassBank").
#"COMMENT.ID",
"CH$NAME1",
"CH$NAME2",
"CH$NAME3",
"CH$NAME4",
"CH$NAME5",
"CH$COMPOUND_CLASS",
"CH$FORMULA",
"CH$EXACT_MASS",
"CH$SMILES",
"CH$IUPAC",
"CH$LINK.CAS",
"CH$LINK.CHEBI",
"CH$LINK.HMDB",
"CH$LINK.KEGG",
"CH$LINK.LIPIDMAPS",
"CH$LINK.PUBCHEM",
"CH$LINK.INCHIKEY",
"CH$LINK.CHEMSPIDER",
"CH$LINK.COMPTOX"
)
# make an empty data frame with the right length
rows <- length(mbdata)
cols <- length(colList)
mbframe <- matrix(data=NA, nrow=rows, ncol=cols)
colnames(mbframe) <- colList
#browser()
for(row in 1:rows)
{
# fill in all the data into the dataframe: all columns which
# a) exist in the target dataframe and b) exist in the (unlisted) MB record
# are written into the dataframe.
data <- unlist(mbdata[[row]])
# bugfix for the case of only one name
if(!("CH$NAME1" %in% names(data)))
data[["CH$NAME1"]] <- data[["CH$NAME"]]
datacols <- intersect(colList, names(data))
mbframe[row,datacols] <- data[datacols]
}
return(mbframe)
}
# Read data from a flat-table MassBank record row and feed it into a
# MassBank tree-like record. Also, prime the ACCESSION and RECORD_TITLE fields in the
# correct position in the record.
#' @export
readMbdata <- function(row)
{
.checkMbSettings()
# Listify the table row. Lists are just cooler to work with :)
row <- as.list(row)
mbdata <- list()
# Accession and title are added empty for now, to have them in the right place.
# Constants are read from the options or generated.
mbdata[['ACCESSION']] <- ""
mbdata[['RECORD_TITLE']] <- ""
mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
mbdata[['AUTHORS']] <- getOption("RMassBank")$annotations$authors
mbdata[['LICENSE']] <- getOption("RMassBank")$annotations$license
mbdata[['COPYRIGHT']] <- getOption("RMassBank")$annotations$copyright
if(getOption("RMassBank")$annotations$publication!="") {
mbdata[['PUBLICATION']] <- getOption("RMassBank")$annotations$publication
}
commentNames <- names(row)[grepl(x = names(row), pattern = "^COMMENT\\.")]
commentNames <- commentNames[!is.na(row[commentNames])]
# Read all determined fields from the file
# This is not very flexible, as you can see...
colList <- c(
commentNames,
#"COMMENT.CONFIDENCE",
#"COMMENT.ID",
"CH$NAME1",
"CH$NAME2",
"CH$NAME3",
"CH$NAME4",
"CH$NAME5",
"CH$COMPOUND_CLASS",
"CH$FORMULA",
"CH$EXACT_MASS",
"CH$SMILES",
"CH$IUPAC",
"CH$LINK.CAS",
"CH$LINK.CHEBI",
"CH$LINK.HMDB",
"CH$LINK.KEGG",
"CH$LINK.LIPIDMAPS",
"CH$LINK.PUBCHEM",
"CH$LINK.INCHIKEY",
"CH$LINK.CHEMSPIDER",
"CH$LINK.COMPTOX")
mbdata[["COMMENT"]] = list()
#mbdata[["COMMENT"]][["CONFIDENCE"]] <- row[["COMMENT.CONFIDENCE"]]
# Again, our ID field.
#mbdata[["COMMENT"]][["ID"]] <- row[["COMMENT.ID"]]
mbdata[["COMMENT"]][gsub(x = commentNames, pattern = "^COMMENT\\.", replacement = "")] <- row[commentNames]
names = c(row[["CH.NAME1"]], row[["CH.NAME2"]], row[["CH.NAME3"]], row[["CH.NAME4"]], row[["CH.NAME5"]])
names = names[which(!is.na(names))]
names <- gsub("'", "`", names)
mbdata[["CH$NAME"]] = names
mbdata[["CH$COMPOUND_CLASS"]] = row[["CH.COMPOUND_CLASS"]]
mbdata[["CH$FORMULA"]] = row[["CH.FORMULA"]]
mbdata[["CH$EXACT_MASS"]] = row[["CH.EXACT_MASS"]]
mbdata[["CH$SMILES"]] = row[["CH.SMILES"]]
mbdata[["CH$IUPAC"]] = row[["CH.IUPAC"]]
# Add all links and then eliminate the NA values from the tree.
link = list()
link[["CAS"]] = row[["CH.LINK.CAS"]]
link[["CHEBI"]] = row[["CH.LINK.CHEBI"]]
link[["HMDB"]] = row[["CH.LINK.HMDB"]]
link[["KEGG"]] = row[["CH.LINK.KEGG"]]
link[["LIPIDMAPS"]] = row[["CH.LINK.LIPIDMAPS"]]
link[["PUBCHEM"]] = row[["CH.LINK.PUBCHEM"]]
link[["INCHIKEY"]] = row[["CH.LINK.INCHIKEY"]]
link[["CHEMSPIDER"]] = row[["CH.LINK.CHEMSPIDER"]]
link[["COMPTOX"]] = row[["CH.LINK.COMPTOX"]]
link[which(is.na(link))] <- NULL
mbdata[["CH$LINK"]] <- link
## SP$SAMPLE
if(all(nchar(row[["SP.SAMPLE"]]) > 0, row[["SP.SAMPLE"]] != "NA", !is.na(row[["SP.SAMPLE"]]), na.rm = TRUE))
mbdata[['SP$SAMPLE']] <- row[["SP.SAMPLE"]]
return(mbdata)
}
#' Generate peak annotation from peaklist
#'
#' Generates the PK$ANNOTATION entry from the peaklist obtained. This function is
#' overridable by using the "annotator" option in the settings file.
#'
#' @param annotation A peak list to be annotated. Contains columns:
#' \code{"cpdID","formula","mzFound" ,"scan","mzCalc","dppm",
#' "dbe","mz","int","formulaCount","parentScan","fM_factor","dppmBest",
#' "formulaMultiplicity","intrel","mzSpec"}
#'
#' @param type The ion type to be added to annotated formulas ("+" or "-" usually)
#'
#' @return The annotated peak table. Table \code{colnames()} will be used for the
#' titles (preferrably don't use spaces in the column titles; however no format is
#' strictly enforced by the MassBank data format.
#'
#' @examples
#' \dontrun{
#' annotation <- annotator.default(annotation)
#' }
#' @author Michele Stravs, Eawag <stravsmi@@eawag.ch>
#' @export
annotator.default <- function(annotation, formulaTag)
{
if(!is.null(formulaTag))
type <- formulaTag
else
type <- ""
annotation <- annotation[!is.na(annotation$formula),,drop=FALSE]
annotation <- annotation[annotation$formula != "",,drop=FALSE]
annotation$formula <- paste(annotation$formula, rep(type, length(annotation$formula)), sep='')
# Select the right columns and name them correctly for output.
annotation <- annotation[,c("mz","formula", "formulaCount", "mzCalc", "dppm")]
colnames(annotation) <- c("m/z", "tentative_formula", "formula_count", "mass", "error(ppm)")
return(annotation)
}
#' Parse record title
#'
#' Parses a title for a single MassBank record using the title format
#' specified in the option titleFormat. Internally used, not exported.
#'
#' If the option is not set, a standard title format is used (for record definition
#' version 1 or 2).
#'
#' @usage .parseTitleString(mbrecord)
#' @param mbrecord A MassBank record in list format, as returned from
#' \code{\link{gatherSpectrum}}.
#' @return A string with the title.
#' @author Michael Stravs, Eawag
#' @seealso \code{\link{compileRecord}}
#' @references MassBank record format:
#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
#' @examples
#' \dontrun{
#' # used in compileRecord()
#' title <- .parseTitleString(mbrecord)
#' }
#'
#'
#'
.parseTitleString <- function(mbrecord)
{
varlist <- getOption("RMassBank")$titleFormat
# Set the standard title format.
if(is.null(varlist))
{
if(getOption("RMassBank")$use_version == 2)
{
varlist <- c(
"{CH$NAME}",
"{AC$INSTRUMENT_TYPE}",
"{AC$MASS_SPECTROMETRY: MS_TYPE}",
"CE: {RECORD_TITLE_CE}",
"R={AC$MASS_SPECTROMETRY: RESOLUTION}",
"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
)
}
else
{
varlist <- c(
"{CH$NAME}",
"{AC$INSTRUMENT_TYPE}",
"{AC$ANALYTICAL_CONDITION: MS_TYPE}",
"CE: {RECORD_TITLE_CE}",
"R={AC$ANALYTICAL_CONDITION: RESOLUTION}",
"{MS$FOCUSED_ION: PRECURSOR_TYPE}"
)
}
}
# Extract a {XXX} argument from each title section.
# check that every title has one and only one match
args <- regexec("\\{(.*)\\}", varlist)
arglist <- regmatches(varlist, args)
if(any(unlist(lapply(arglist, length)) != 2))
stop("Title format is incorrectly specified: a section with not exactly 1 parameters")
parsedVars <- lapply(varlist, function(var)
{
# Extract the specified parameter inside the {}.
# I.e. from a string like "R={BLA: BLUB}" return "BLA: BLUB"
args <- regexec("\\{(.*)\\}", var)
arg <- regmatches(var, args)[[1]][[2]]
# Split the parameter by colon if necessary
splitVar <- strsplit(arg, ": ")[[1]]
# Read the parameter value from the record
if(length(splitVar) == 2)
replaceVar <- mbrecord[[splitVar[[1]]]][[splitVar[[2]]]]
else if(length(splitVar) == 1)
replaceVar <- mbrecord[[splitVar]]
else
stop(paste(
"Title format is incorrectly specified:", var)
)
# Fix problems: NULL returns
if(is.null(replaceVar))
replaceVar <- ""
# Fix problems: Names will have >= 1 match. Take the first
if(length(replaceVar) > 1)
replaceVar <- replaceVar[[1]]
# Fix problems: Unknowns might have no name
if(!length(replaceVar)){
replaceVar <- ""
}
# Substitute the parameter value into the string
parsedVar <- sub("\\{(.*)\\}", replaceVar, var)
return(parsedVar)
})
title <- paste(parsedVars, collapse="; ")
return(title)
}
# This converts the tree-like list (as obtained e.g. from compileRecord())
# into a plain text array, which can then be dumped to a file suitable for
# MassBank upload.
#' Write MassBank record into character array
#'
#' Writes a MassBank record in list format to a text array.
#'
#' The function is a general conversion tool for the MassBank format; i.e. the
#' field names are not fixed. \code{mbdata} must be a named list, and the
#' entries can be as follows: \itemize{
#' \item A single text line:
#'
#' \code{'CH\$EXACT_MASS' = '329.1023'}
#'
#' is written as
#'
#' \code{CH\$EXACT_MASS: 329.1023}
#' \item A character array:
#'
#' \code{'CH\$NAME' = c('2-Aminobenzimidazole', '1H-Benzimidazol-2-amine')}
#'
#' is written as
#'
#' \code{CH\$NAME: 2-Aminobenzimidazole}
#'
#' \code{CH\$NAME: 1H-Benzimidazol-2-amine}
#'
#' \item A named list of strings:
#'
#' \code{'CH\$LINK' = list('CHEBI' = "27822", "KEGG" = "C10901")}
#'
#' is written as
#'
#' \code{CH\$LINK: CHEBI 27822}
#'
#' \code{CH\$LINK: KEGG C10901}
#'
#' \item A data frame (e.g. the peak table) is written as specified in
#' the MassBank record format (Section 2.6.3): the column names are used as
#' headers for the first line, all data rows are printed space-separated.
#' }
#'
#' @usage toMassbank(mbdata)
#' @param mbdata A MassBank record in list format.
#' @return The result is a text array, which is ready to be written to the disk
#' as a file.
#' @note The function iterates over the list item names. \bold{This means that
#' duplicate entries in \code{mbdata} are (partially) discarded!} The correct
#' way to add them is by making a character array (as specified above): Instead
#' of \code{'CH\$NAME' = 'bla', 'CH\$NAME' = 'blub'} specify \code{'CH\$NAME' =
#' c('bla','blub')}.
#' @author Michael Stravs
#' @seealso \code{\link{compileRecord}}, \code{\link{mbWorkflow}}
#' @references MassBank record format:
#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
#' @examples
#' \dontrun{
#' # Read just the compound info skeleton from the Internet for some compound ID
#' id <- 35
#' mbdata <- gatherData(id)
#' #' # Export the mbdata blocks to line arrays
#' # (there is no spectrum information, just the compound info...)
#' mbtext <- toMassbank(mbdata)
#' }
#'
#'
#' @export
setGeneric("toMassbank", function(o, ...) standardGeneric("toMassbank"))
#' @export
setMethod("toMassbank", "RmbSpectraSet", function(o, addAnnotation = getOption("RMassBank")$add_annotation)
{
lapply(o@children, function(s) toMassbank(s, addAnnotation))
})
#' @export
setMethod("toMassbank", "RmbSpectrum2", function(o, addAnnotation = getOption("RMassBank")$add_annotation)
{
.toMassbank(o, addAnnotation)
})
.toMassbank <- function (s, addAnnotation = getOption("RMassBank")$add_annotation)
{
peaks <- getData(s)
# check that peaks were normalized
if(!("intrel" %in% colnames(peaks)))
{
s <- normalize(s, slot="intrel")
peaks <- getData(s)
}
# Keep only peaks with relative intensity >= 1 o/oo, since the MassBank record
# makes no sense otherwise. Also, keep only the columns needed in the output.
peaks <- peaks[ peaks$intrel >= 1,,drop=FALSE]
peaks$mz <- round(peaks$mz, 4)
# Also format the other values, which are used in the annotation
peaks$dppm <- round(peaks$dppm, 2)
peaks$mzCalc <- round(peaks$mzCalc, 4)
peaks$intensity <- round(peaks$intensity, 1)
# Get polarity from Spectrum2 now!
formulaTag <- ""
if(s@polarity == 1) formulaTag <- "+"
if(s@polarity == 0) formulaTag <- "-"
# if polarity is -1, leave it unspecified. the "specs" seem to be 1 for +, 0 for - and -1 for ???
# (when reading mzML I often get -1, when reading mzXML I get 1 and 0 respectively)
annotator <- getOption("RMassBank")$annotator
if(is.null(annotator))
annotator <- "annotator.default"
annotation <- do.call(annotator, list(annotation= peaks, formulaTag = formulaTag))
peaks <- peaks[,c("mz", "intensity", "intrel")]
peaks <- unique(peaks)
# Name the columns correctly.
colnames(peaks) <- c("m/z", "int.", "rel.int.")
peaknum <- nrow(peaks)
mbdata <- s@info
mbdata[["PK$SPLASH"]] <- list(SPLASH = getSplash(peaks[,c("m/z", "int.")]))
# Annotation:
if(addAnnotation && (nrow(annotation) > 0))
mbdata[["PK$ANNOTATION"]] <- annotation
# Peak table
mbdata[["PK$NUM_PEAK"]] <- peaknum
mbdata[["PK$PEAK"]] <- peaks
# mbf is an array of lines and count is the line counter.
# Very old-school, but it works. :)
mbf <- character(0)
count <- 1
lapply(names(mbdata), function(entry)
{
# If entry is a char line, add it to the file.
# If it is a named sublist, add each subentry with name
# If it is an unnamed sublist, add each subentry without name
# if it is a dataframe, write in PEAKS mode
# Note: this is were I liked "lapply" a little too much. "for" would
# be more idiomatic, and wouldn't need the <<- assignments.
# Data frame: table mode. A header line and one space-separated line for
# each data frame row.
if(is.data.frame(mbdata[[entry]]))
{
mbf[[count]] <<- paste(entry,": " ,
paste(colnames(mbdata[[entry]]), collapse=" "),
sep='')
count <<- count+1
for(row in 1:nrow(mbdata[[entry]]))
{
mbf[[count]] <<- paste(" ",
paste(mbdata[[entry]][row,],collapse=" "),
sep="")
count <<- count+1
}
#browser()
}
# List with named items: Write every entry like CH$LINK: CAS 12-345-678
else if(is.list(mbdata[[entry]]) & !is.null(names(mbdata[[entry]])))
{
lapply(names(mbdata[[entry]]), function(subentry)
{
if(subentry != "SPLASH"){
mbf[[count]] <<- paste(entry,": ",subentry, " ", mbdata[[entry]][[subentry]], sep='')
} else {
mbf[[count]] <<- paste(entry,": ", mbdata[[entry]][[subentry]], sep='')
}
#print(mbf)
count <<- count + 1
})
}
# Array (or list) of unnamed items: Write every entry like CH$NAME: Paracetamol
# (iterative entry without subindices)
else if (length(mbdata[[entry]]) > 1 & is.null(names(mbdata[[entry]])))
{
lapply(mbdata[[entry]], function(subentry)
{
mbf[[count]] <<- paste(entry,": ",subentry, sep='')
#print(mbf)
count <<- count + 1
})
}
# Length is 1: just write the entry like PK$NUM_PEAKS: 131
else
{
mbf[[count]] <<- paste(entry,": ",mbdata[[entry]], sep='')
count <<- count + 1
}
}
) # End of lapply block (per child spectrum)
# Add mandatory EOF marker
mbf[[count]] <- "//"
return(mbf)
}
# Exports compiled and massbanked spectra, with their associated molfiles, to physical files.
# "compiled" is still used here, because we need an accessible accession number.
# In the plain text arrays, the accession number is already "hidden".
# compiled: is ONE "compiled" entry, i.e. ONE compound with e.g. 14 spectra.
# files: is a return value from lapply(toMassbank), i.e. contains 14 plain-text arrays
# (for a 14-spectra method)
# molfile: a molfile from createMolfile
#' Export internally stored MassBank data to files
#'
#' Exports MassBank recfile data arrays and corresponding molfiles to physical
#' files on hard disk, for one compound.
#'
#' The data from \code{compiled} is still used here, because it contains the
#' "visible" accession number. In the plain-text format contained in
#' \code{files}, the accession number is not "accessible" anymore since it's in
#' the file.
#'
#' @usage exportMassbank(compiled, files, molfile)
#' @param compiled Is ONE "compiled" entry, i.e. ONE compound with e.g. 14
#' spectra, as returned from \code{\link{compileRecord}}.
#' @param files A n-membered array (usually a return value from
#' \code{lapply(\link{toMassbank})}), i.e. contains n plain-text arrays with
#' MassBank records.
#' @param molfile A molfile from \code{\link{createMolfile}}
#' @return No return value.
#' @note An improvement would be to write the accession numbers into
#' \code{names(compiled)} and later into \code{names(files)} so \code{compiled}
#' wouldn't be needed here anymore. (The compound ID would have to go into
#' \code{names(molfile)}, since it is also retrieved from \code{compiled}.)
#' @author Michael Stravs
#' @seealso \code{\link{createMolfile}}, \code{\link{compileRecord}},
#' \code{\link{toMassbank}}, \code{\link{mbWorkflow}}
#' @references MassBank record format:
#' \url{http://www.massbank.jp/manuals/MassBankRecord_en.pdf}
#' @examples
#' \dontrun{
#' compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
#' mbfiles <- toMassbank(compiled)
#' molfile <- createMolfile(compiled[[1]][["CH$SMILES"]])
#' exportMassbank(compiled, mbfiles, molfile)
#' }
#'
#' @export
exportMassbank <- function(compiled, molfile = NULL)
{
exportMassbank_recdata(
compiled,
recDataFolder = file.path(getOption("RMassBank")$annotations$entry_prefix, "recdata")
)
if(!is.null(molfile)) {
exportMassbank_moldata(
compiled,
molfile,
molDataFolder = file.path(getOption("RMassBank")$annotations$entry_prefix, "moldata")
)
}
}
exportMassbank_recdata <- function(compiled, recDataFolder)
{
#mb@mbfiles <- lapply(mb@compiled_ok, function(cpd) toMassbank(cpd, mb@additionalPeaks))
files <- toMassbank(compiled)
names(files) <- lapply(compiled@children, function(c) c@info[["ACCESSION"]] )
molnames <- c()
for(file in seq_len(length(files)))
{
# Read the accession no. from the corresponding "compiled" entry
filename <- names(files)[[file]]
# use this accession no. as filename
filename <- paste(filename, ".txt", sep="")
filePath <- file.path(recDataFolder,filename)
write(files[[file]], filePath)
}
}
exportMassbank_moldata <- function(compiled, molfile, molDataFolder)
{
# Use internal ID for naming the molfiles
if(findLevel(compiled@id,TRUE)=="standard"){
molname <- sprintf("%04d", as.numeric(compiled@id))
molname <- paste(molname, ".mol", sep="")
write(molfile, file.path(molDataFolder,molname))
}
}
# Makes a list.tsv with molfile -> massbank ch$name attribution.
#' Write list.tsv file
#'
#' Makes a list.tsv file in the "moldata" folder.
#'
#' Generates the list.tsv file which is needed by MassBank to connect records with
#' their respective molfiles. The first compound name is linked to a mol-file with
#' the compound ID (e.g. 2334.mol for ID 2334).
#'
#' @param compiled A list of compiled spectra (in tree-format, as returned by \code{compileRecord}).
#' @return No return value.
#' @author Michael A. Stravs, Eawag <michael.stravs@@eawag.ch>
#' @examples \dontrun{
#' compiled <- compileRecord(record, mbdata, refilteredRcSpecs)
#' # a list.tsv for only one record:
#' clist <- list(compiled)
#' makeMollist(clist)
#' }
#' @export
makeMollist <- function(compiled)
{
# For every "compiled" entry (here, compiled is not one "compiled" entry but the total
# list of all compiled spectra), extract the uppermost CH$NAME and the ID (from the
# first spectrum.) Make the ID into 0000 format.
tsvlist <- t(sapply(compiled, function(entry)
{
name <- entry@children[[1]]@info[["CH$NAME"]][[1]]
id <- sprintf("%04d", as.numeric(entry@id))
molfilename <- paste(id,".mol",sep='')
return(c(name,molfilename))
}))
IDs <- sapply(compiled, function(entry) return( sprintf("%04d", as.numeric(
entry@id))))
level <- sapply(IDs, findLevel, compact=TRUE)
validentries <- which(level == "standard")
# Write the file with the
write.table(tsvlist[validentries,],
paste(getOption("RMassBank")$annotations$entry_prefix,"/moldata/list.tsv", sep=''),
quote = FALSE,
sep="\t",
row.names=FALSE,
col.names=FALSE
)
}
# Load a dataframe or file into additional_peaks (or add additional points in there.)
# The columns cpdID, scan, mzFound, int, OK are mandatory. OK=1 means that the peaks
# will be added into the spectrum. mzFound and int will be taken for the table.
# No annotation will be written.
# Add peaks to the spectra by hand
#' Add additional peaks to spectra
#'
#' Loads a table with additional peaks to add to the MassBank spectra. Required
#' columns are \code{cpdID, scan, int, mzFound, OK}.
#'
#' All peaks with OK=1 will be included in the spectra.
#'
#' @usage addPeaks(mb, filename_or_dataframe)
#' @param mb The \code{mbWorkspace} to load the peaks into.
#' @param filename_or_dataframe Filename of the csv file, or name of the R
#' dataframe containing the peaklist.
#' @return The \code{mbWorkspace} with loaded additional peaks.
#' @author Michael Stravs
#' @seealso \code{\link{mbWorkflow}}
#' @examples
#'
#' \dontrun{addPeaks("myrun_additionalPeaks.csv")}
#'
#' @export
addPeaks <- function(mb, filename_or_dataframe)
{
errorvar <- 0
currEnvir <- environment()
d <- 1
if(is.data.frame(filename_or_dataframe))
df <- filename_or_dataframe
else
tryCatch(
df <- read.csv(filename_or_dataframe),
error=function(e){
currEnvir$errorvar <- 1
})
# I change your heuristic fix to another heuristic fix, because I will have to test for a column name change...
if(!errorvar){
if(ncol(df) < 2)
df <- read.csv(filename_or_dataframe, sep=";")
# here: the column int was renamed to intensity, and we need to be able to read old files. sorry.
if(!("intensity" %in% colnames(df)) & ("int" %in% colnames(df)))
df$intensity <- df$int
cols <- c("cpdID", "scan", "mzFound", "intensity", "OK")
n <- colnames(df)
# Check if comma-separated or semicolon-separated
d <- setdiff(cols, n)
if(length(d)>0){
stop("Some columns are missing in the additional peak list. Needs at least cpdID, scan, mzFound, intensity, OK.")
}
}
culled_df <- df[,c("cpdID", "scan", "mzFound", "intensity", "OK")]
if(nrow(mb@additionalPeaks) == 0)
mb@additionalPeaks <- culled_df
else
mb@additionalPeaks <- rbind(mb@additionalPeaks, culled_df)
return(mb)
}
gatherDataMinimal.cpd <- function(cpd){
##Read from Compoundlist
if(length(cpd@smiles) == 1) smiles <- cpd@smiles
else
smiles <- ""
##Create
mbdata <- list()
mbdata[['ACCESSION']] <- ""
mbdata[['RECORD_TITLE']] <- ""
mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
# Confidence annotation and internal ID annotation.
# The ID of the compound will be written like:
# COMMENT: EAWAG_UCHEM_ID 1234
# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
if(length(cpd@id) > 0)
mbdata[["COMMENT"]][["ID"]] <- cpd@id
# here compound info starts
mbdata[['CH$NAME']] <- cpd@name
# Currently we use a fixed value for Compound Class, since there is no useful
# convention of what should go there and what shouldn't, and the field is not used
# in search queries.
mbdata[['CH$FORMULA']] <- cpd@formula
mbdata[['CH$EXACT_MASS']] <- round(findMz.formula(cpd@formula, "")$mzCenter, 4)
if(cpd@smiles != "")
mbdata[['CH$SMILES']] <- cpd@smiles
link <- list()
mbdata[['CH$LINK']] <- link
return(mbdata)
}
gatherDataMinimal.spectrum <- function(spectrum){
##Read from Compoundlist
if(length(cpd@smiles) == 1) smiles <- cpd@smiles
else
smiles <- ""
##Create
mbdata <- list()
mbdata[['ACCESSION']] <- ""
mbdata[['RECORD_TITLE']] <- ""
mbdata[['DATE']] <- format(Sys.Date(), "%Y.%m.%d")
# Confidence annotation and internal ID annotation.
# The ID of the compound will be written like:
# COMMENT: EAWAG_UCHEM_ID 1234
# if annotations$internal_id_fieldname is set to "EAWAG_UCHEM_ID"
# here compound info starts
mbdata[['CH$NAME']] <- paste("parent", spectrum@precursorMz, "at RT", spectrum@rt, "- CE", spectrum@collisionEnergy)
# Currently we use a fixed value for Compound Class, since there is no useful
# convention of what should go there and what shouldn't, and the field is not used
# in search queries.
return(mbdata)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.