# setting up configuration files for MetFragBetaPlus and performing the calcs
# E. Schymanski, 9/6/2015
# Rev. 1 16/11/2015 - MetFrag2.2
# NOTE: 15/2/2017: Moving to ReSOLUTION package
# C:/DATA/R/ReSOLUTION_scripts/MetFragConfigR.R
# NOTE 4/4/2018: MetFrag2.4.3 & 2.4.4MS-ready documenting etc.
# fresh download from http://c-ruttkies.github.io/MetFrag/projects/metfragcl/
# requires readxl
# set up default MetFrag Config Files
# minimum info is mass, adduct type, msms peak list name
#' Create MetFrag Configuration Files
#'
#' @description This function provides options to set up configuration files to run MetFrag Command Line
#' in batch mode. Minimum information is mass, adduct type and MS/MS peak list. MetFrag Command Line
#' is available from \url{http://c-ruttkies.github.io/MetFrag/projects/metfragcl/}
#'
#' @usage MetFragConfig(mass, adduct_type, results_filename, peaklist_path, base_dir,
#' DB=c("PubChem"),
#' localDB_path="", output="XLS", token="", neutralPrecursorMass=FALSE,
#' ppm=5, mzabs=0.001, frag_ppm=5, IsPosMode=TRUE,
#' tree_depth=2, num_threads=1, add_refs=TRUE, minInt=0, rt_file_path="", rt_exp=0,suspect_path="",
#' suspect_filter=FALSE, UDS_Category="", UDS_Weights="", DB_IDs="", mol_form="", useFormula=FALSE,
#' useMoNAMetFusion=TRUE, useMonaIndiv=TRUE, MoNAoffline=TRUE, incl_el="",excl_el="", incl_exclusive=FALSE,
#' incl_smarts_filter="", incl_smarts_score="", excl_smarts_filter="",excl_smarts_score="", filter_isotopes=TRUE,
#' filter_by_InChIKey=TRUE)
#'
#' @param mass The mass with which to search the candidate database (\code{DB}). Use \code{neutralPrecursorMass} and
#' \code{adduct_type} to set whether this is monoisotopic mass or an adduct species.
#' @param adduct_type The adduct species used to define mass (if \code{neutralPrecursorMass=FALSE}) and fragmentation settings
#' in the config file, entered as either \code{PrecursorIonType} (text) or \code{PrecursorIonmode} (a number). The available
#' options are given in the system file \code{MetFragAdductTypes.csv} in the \code{extdata} folder. If
#' \code{neutralPrecursorMass=TRUE}, set \code{adduct_type=0}.
#' Recommended default values (if ion state is unclear) are \code{[M+H]+} (1) for positive and \code{[M-H]-} (-1) for negative mode.
#' @param results_filename Enter a base filename for naming the results files - do not include file endings
#' @param peaklist_path Enter the full path and file name to the peak list for this config file
#' @param base_dir Enter the directory name to set up the subfolders for MetFrag batch results. If the folders don't exist,
#' subfolders \code{config}, \code{log} and \code{results} are created; the output of this function is saved in \code{config}.
#' @param DB Enter query database name. Current options \code{KEGG}, \code{PubChem}, \code{ExtendedPubChem}, \code{ChemSpider},
#' \code{FOR_IDENT}, \code{MetaCyc}, \code{LocalCSV}, \code{LocalPSV} or \code{LocalSDF}. For \code{HMDB}, \code{LipidMaps} and
#' \code{KEGG-derivatised} use the \code{LocalCSV} option with respective files downloaded from
#' \url{https://msbi.ipb-halle.de/~cruttkie/databases/}.
#' @param localDB_path Full path and file name to the local database for \code{LocalCSV, LocalPSV or localSDF}. Otherwise leave empty.
#' If the file is not found, the config file defaults to \code{DB=PubChem}.
#' @param output Select output format(s) desired. Current options include one or more of
#' \code{SDF, XLS, CSV, ExtendedXLS, ExtendedFragmentsXLS} entered as a string. Not tested; incorrect entries will lead to CL failure.
#' @param token ChemSpider token, only required for \code{DB=ChemSpider}. See \url{http://www.chemspider.com/MassSpecAPI.asmx} for
#' more details about which services require tokens and \url{http://www.chemspider.com/help-create-a-chemspider-account.aspx} for
#' information how to obtain your token. If an invalid token is provided (not length=36), \code{DB} defaults to \code{PubChem}.
#' @param neutralPrecursorMass Controls whether \code{mass} is treated as a neutral or charged mass. If \code{TRUE}, treated
#' as neutral. If \code{FALSE} (default), this is entered as a charged mass, adjusted in MetFragCL with the \code{adduct_type} setting.
#' @param mol_form A string containing the molecular formula (used in candidate retreival)
#' @param useFormula Default \code{FALSE} means an exact mass search is performed. If \code{TRUE}, \code{mol_form} must be given and
#' candidate retreival is based on this formula. Note some databases are sensitive to the order of elements in the formula.
#' @param DB_IDs Use this to select only certain candidates using (comma-separated) database identifiers consistent with \code{DB}.
#' @param ppm The ppm error to perform the exact mass search for candidate retrieval (default 5 ppm)
#' @param mzabs The absolute error (in Da/Th) used to match fragments to observed MS/MS peaks. Additive with \code{frag_ppm}.
#' Default 0.001 Da (Th).
#' @param frag_ppm The relative error (in ppm) used to match fragments to observed MS/MS peaks. Additive with \code{mzabs}.
#' Default 5 ppm.
#' @param IsPosMode Controls the mode for both candidate retrieval and fragmentation consistently. Default \code{TRUE} sets
#' positive mode, switch to \code{FALSE} for negative mode data.
#' @param tree_depth Sets the number of fragmentation steps. Default=2 is recommended. Higher values lead to long calculation times.
#' @param num_threads Sets the number of threads used to run calculations. Default=1; set higher for faster results.
#' @param add_refs If set to (default) \code{TRUE}, reference scoring terms will be added for \code{DB=PubChem} and \code{DB=ChemSpider}.
#' Two terms (references, patents) are added for \code{PubChem}, weighted 0.5; four terms weighted 0.25 for \code{ChemSpider}.
#' These setttings can be overwritten by setting \code{add_refs=FALSE} and adding the desired terms to \code{UDS_Category} and
#' \code{UDS_weights}.
#' @param minInt Minimum intensity value to consider peaks in the MS/MS file. Default 0, this is merely a convenience function to
#' allow users to do a bare minimum noise reduction if required.
#' @param rt_file_path Full path to the CSV file containing InChIs and retention times (RTs) of standards to build the RT model.
#' The file should contain two column separated columns with a header row with the column names \code{InChI} and \code{RetentionTime}.
#' The example system file \code{Eawag_rt_inchi.csv} in the \code{extdata} folder is the correct dataset for Eawag MassBank
#' records measured on the XBridge C18 column.
#' @param rt_exp The experimental retention time. The chromatography and RT unit must match with the file in \code{rt_file_path}.
#' @param suspect_path Path to the suspect lists to be used as a filter or scoring term.
#' @param suspect_filter Default \code{FALSE} means suspect lists in \code{suspect_path} are used to increase the score of
#' candidates present in the suspect lists given (added as a scoring term).
#' If \code{TRUE}, suspect lists are used as a filter instead (only candidates present in the suspect lists are processed).
#' @param UDS_Category A string containing the exact column headers of additional User Defined Scores (UDS) to use, separated by
#' a comma. These column headers must match exactly, cannot be repeated and
#' must be present in the default database chosen or in the LocalCSV, PSV or SDF files used as a local database.
#' This can also be used to overwrite the default reference information in \code{add_refs}.
#' @param UDS_Weights A string containing comma-separated weight values corresponding to \code{UDS_Category}. This must
#' match exactly or an exception is thrown during processing.
#' @param useMoNAMetFusion Default \code{TRUE} means that the MoNA MetFusion Score is added by default. Use \code{FALSE} to exclude.
#' @param useMonaIndiv Default \code{TRUE} means that the MoNA Individual Score is added by default. Use \code{FALSE} to exclude.
#' This performs a direct lookup by InChIKey and returns the highest similarity value over all matches. A good match is a very good sign;
#' a poor match means there is a spectrum in MoNA for that compound but this may have been recorded with vastly different settings, so
#' a poor match does not necessarily indicate that the candidate is wrong.
#' @param MoNAoffline Default \code{TRUE} means the local MoNA instance (in the jar file) is used to avoid server issues. Use
#' \code{FALSE} to perform this live, however this may not work.
#' @param incl_el A string containing comma-separated elements that must be present in candidates. This allows coupling of an
#' exact mass search with the presence of elements containing distinct isotope patterns.
#' @param excl_el A string containing comma-separated elements that must not be present in candidates. This allows coupling of an
#' exact mass search with the absence of elements containing distinct isotope patterns.
#' @param incl_exclusive Default \code{FALSE} indicates that the elements in \code{incl_el} must be present, but other elements
#' could still be present. If \code{TRUE}, only these elements are considered (use this option with caution!)
#' @param incl_smarts_filter A string containing SMARTS codes (comma-separated) used to define substructures present (candidates that
#' do not contain these SMARTS are filtered out).
#' @param incl_smarts_score A string containing SMARTS codes (comma-separated) used to increase the score of candidates with
#' certain substructures present.
#' @param excl_smarts_filter A string containing SMARTS codes to exclude candidates with these substructures present.
#' @param excl_smarts_score A string containing SMARTS codes to penalize candidate scores with these substructures present.
#' @param filter_isotopes Default \code{TRUE} removes all candidates containing non-standard isotopes.
#' @param filter_by_InChIKey Default \code{TRUE} collapses the candidate result lists by the first block of the InChIKey, presenting
#' only the candidate with the best score across all categories. If \code{FALSE}, all candiates are included in the results.
#'
#' @return Creates a MetFrag config file matching the given parameters and returns the file name.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL author).
#'
#' @seealso \code{\link{runMetFrag}} to run the config files.
#'
#' @export
#'
#' @examples
#' # Do not run unless you adjusted test_dir to an existing file location
#' peaklist_path <- system.file("extdata","EA026206_Simazine_peaks.txt",package="ReSOLUTION")
#' # change this directory to an existing one, or this example won't work
#' test_dir <- "C:/DATA/Workflow/MetFrag22/metfrag_test_results"
#' testCSV <- system.file("extdata","dsstox_MS_Ready_MetFragTestCSV5.csv",package="ReSOLUTION")
#'
#' config_file <- MetFragConfig(201.0776,"[M+H]+","Simazine_neutralMass_PubChem",peaklist_path, test_dir, DB="PubChem",neutralPrecursorMass=TRUE)
#' config_file2 <- MetFragConfig(202.0854,1,"Simazine_precMass_localCSV",peaklist_path,test_dir,DB="LocalCSV",localDB_path=testCSV)
#' config_file2 <- MetFragConfig(202.0854,1,"Simazine_precMass_10ppm",peaklist_path,test_dir,DB="LocalCSV",localDB_path=testCSV,ppm=10)
#' config_file2 <- MetFragConfig(202.0854,1,"Simazine_precMass_10ppm_InChIFilterOff",peaklist_path,test_dir,DB="LocalCSV",
#' localDB_path=testCSV,ppm=10,filter_by_InChIKey = FALSE)
#'
#' #to find out the adduct states:
#' MetFragAdductTypes <- read.csv(system.file("extdata","MetFrag_AdductTypes.csv",package="ReSOLUTION"))
#'
#' # to run the config files
#' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
#' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
#' # warning: this first query takes a while, for quick testing run config_file2
#' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
#' runMetFrag(config_file2, metfrag_dir, MetFragCL_name)
#'
MetFragConfig <- function(mass, adduct_type, results_filename, peaklist_path, base_dir,
DB=c("PubChem"),
localDB_path="",output="XLS", token="",
neutralPrecursorMass=FALSE, mol_form="",useFormula=FALSE,DB_IDs="",
ppm=5, mzabs=0.001, frag_ppm=5, IsPosMode=TRUE,
tree_depth=2, num_threads=1, add_refs=TRUE, minInt=0,
rt_file_path="",rt_exp=0,suspect_path="",suspect_filter=FALSE,
UDS_Category="",UDS_Weights="",
useMoNAMetFusion=TRUE,useMonaIndiv=TRUE,MoNAoffline=TRUE,
incl_el="",excl_el="",incl_exclusive=FALSE,
incl_smarts_filter="",incl_smarts_score="",
excl_smarts_filter="",excl_smarts_score="",
filter_isotopes=TRUE,filter_by_InChIKey=TRUE) {
# check database definition
# @param DB Enter query database name. Current options \code{KEGG}, \code{PubChem}, \code{ExtendedPubChem}, \code{ChemSpider},
# \code{FOR_IDENT}, \code{MetaCyc}, \code{LocalCSV}, \code{LocalPSV} or \code{LocalSDF}. For \code{HMDB}, \code{LipidMaps} and
# \code{KEGG-derivatised} use the \code{LocalCSV} option with respective files downloaded from
# \url{https://msbi.ipb-halle.de/~cruttkie/databases/}. Coming soon: CompTox Dashboard.
if(length(DB) > 1) {
stop("Please define only one database")
}
if(!(DB %in% c("KEGG","PubChem","ExtendedPubChem","ChemSpider", "FOR-IDENT", "MetaCyc",
"LocalCSV", "LocalPSV", "LocalSDF"))) {
stop("Incorrect database: select one of KEGG, PubChem, ExtendedPubChem, ChemSpider, FOR-IDENT, MetaCyc,
LocalCSV, LocalPSV or LocalSDF")
}
# check adduct type definition
MetFragAdductTypes <- read.csv(system.file("extdata","MetFrag_AdductTypes.csv",package="ReSOLUTION"))
adduct_num_list <- MetFragAdductTypes$PrecursorIonMode
adduct_name_list <- MetFragAdductTypes$PrecursorIonType
isPosIonMode_list <- MetFragAdductTypes$IsPositiveIonMode
adduct_type_name <- grep(adduct_type,MetFragAdductTypes$PrecursorIonType,fixed=TRUE)
if (length(adduct_type_name)<1) {
adduct_type_name <- FALSE
}
adduct_type_num <- adduct_type %in% MetFragAdductTypes$PrecursorIonMode
if (is.na(adduct_type_num)) {
adduct_type_num <- FALSE
}
if(!(adduct_type_name || adduct_type_num)) {
stop(paste0("Incorrect adduct type: supported adducts are listed in ",
system.file("extdata","MetFrag_AdductTypes.csv",package="ReSOLUTION")))
}
# adduct settings are compulsory, as they define fragmentation and (potentially) mass settings
# set up files and paths
peaklist <- peaklist_path
config_dir <- paste(base_dir,"/config",sep="")
if (!file.exists(config_dir)) {
dir.create(config_dir)
}
# add test for whether dir exists
config_name <- paste(config_dir,"/",paste(results_filename,"_config.txt", sep=""),sep="")
SampleName <- results_filename
ResultsPath <- paste(base_dir,"/results",sep="")
LocalDatabasePath <- localDB_path
if (!file.exists(ResultsPath)) {
dir.create(ResultsPath)
}
# Open file and write parameters after defining them
file.create(config_name)
file.conn <- file(config_name)
open(file.conn,open="at")
writeLines(paste("SampleName = ",as.character(SampleName),sep=""),con=file.conn)
writeLines(paste("ResultsPath = ",as.character(ResultsPath),sep=""),con=file.conn)
writeLines(paste("PeakListPath = ",as.character(peaklist),sep=""),con=file.conn)
## Setting up databases and parameters
# adduct setting compulsory:
# if(!(adduct_type %in% (adduct_num_list || adduct_name_list))) {
# stop(paste0("Incorrect adduct type: supported adducts are listed in ",
# system.file("extdata","MetFrag_AdductTypes.csv",package="ReSOLUTION")))
# }
if (adduct_type_name) {
# retrieve the adduct mode to test this
adduct_mode <- isPosIonMode_list[grep(adduct_type,adduct_name_list,fixed=TRUE)]
# if we have a name match and if the modes match, print PrecursorIonType, else stop.
if (adduct_mode == IsPosMode) {
writeLines(paste("PrecursorIonType = ",as.character(adduct_type),sep=""),con=file.conn)
} else {
stop("The adduct_type and IsPosMode settings mismatch. Please check and retry")
}
} else if (adduct_type_num) {
# retrieve adduct mode to test
adduct_mode <- isPosIonMode_list[match(adduct_type,adduct_num_list)]
if (adduct_type %in% MetFragAdductTypes$PrecursorIonMode[1]) {
# if adduct type is zero, don't check, only write out if not neutralPrecursorMass
if (!neutralPrecursorMass) {
writeLines(paste("PrecursorIonMode = ",as.character(adduct_type),sep=""),con=file.conn)
}
} else if (adduct_mode == IsPosMode) {
# if we have a number match and the modes match, print PrecursorIonMode, else stop
writeLines(paste("PrecursorIonMode = ",as.character(adduct_type),sep=""),con=file.conn)
} else {
stop("The adduct_type and IsPosMode settings mismatch. Please check and retry")
}
} else {
# this case should never happen, but just in case ...
stop(paste0("Error in adduct settings, please check and try again. Supported adducts are listed in ",
system.file("extdata","MetFrag_AdductTypes.csv",package="ReSOLUTION")))
}
# now set up exact masses and corrections
#IonizedPrecursorMass <- mass
if (useFormula) {
writeLines(paste("NeutralPrecursorMolecularFormula = ",as.character(mol_form),sep=""),con=file.conn)
if (neutralPrecursorMass) {
writeLines(paste("NeutralPrecursorMass = ",as.character(mass),sep=""),con=file.conn)
} else {
writeLines(paste("IonizedPrecursorMass = ",as.character(mass),sep=""),con=file.conn)
}
} else if (neutralPrecursorMass) {
writeLines(paste("NeutralPrecursorMass = ",as.character(mass),sep=""),con=file.conn)
} else {
writeLines(paste("IonizedPrecursorMass = ",as.character(mass),sep=""),con=file.conn)
}
if (IsPosMode) {
IsPositiveIonMode <- "True"
} else {
IsPositiveIonMode <- "False"
}
# add test for adduct type here - checking the mode too
DatabaseSearchRelativeMassDeviation <- ppm
FragmentPeakMatchAbsoluteMassDeviation <- mzabs
FragmentPeakMatchRelativeMassDeviation <- frag_ppm
MinimumAbsolutePeakIntensity <- minInt # optional: to filter noise peaks
MaximumTreeDepth <- tree_depth
NumberThreads <- num_threads
# SDF, XLS, CSV, ExtendedXLS, ExtendedFragmentsXLS
MetFragCandidateWriter <- output
#
writeLines(paste("IsPositiveIonMode = ",as.character(IsPositiveIonMode),sep=""),con=file.conn)
writeLines(paste("DatabaseSearchRelativeMassDeviation = ",
as.character(DatabaseSearchRelativeMassDeviation),sep=""),con=file.conn)
if (MinimumAbsolutePeakIntensity > 0) {
writeLines(paste("MinimumAbsolutePeakIntensity = ",
as.character(MinimumAbsolutePeakIntensity),sep=""),con=file.conn)
}
writeLines(paste("FragmentPeakMatchAbsoluteMassDeviation = ",
as.character(FragmentPeakMatchAbsoluteMassDeviation),sep=""),con=file.conn)
writeLines(paste("FragmentPeakMatchRelativeMassDeviation = ",
as.character(FragmentPeakMatchRelativeMassDeviation),sep=""),con=file.conn)
writeLines(paste("MaximumTreeDepth = ",as.character(MaximumTreeDepth),sep=""),con=file.conn)
# preprocessing, postprocessing and score information written later
writeLines(paste("NumberThreads = ",as.character(NumberThreads),sep=""),con=file.conn)
writeLines(paste("MetFragCandidateWriter = ",as.character(MetFragCandidateWriter),sep=""),con=file.conn)
# Processing Options
MetFragPreProcessingCandidateFilter <- "UnconnectedCompoundFilter"
if (filter_isotopes) {
MetFragPreProcessingCandidateFilter <- paste(MetFragPreProcessingCandidateFilter,
",IsotopeFilter",sep="")
} # this filters compounds with non-standard isotopes
MetFragDatabaseType <- DB
ChemSpiderToken <- token
if (grepl("ChemSpider",DB) && nchar(ChemSpiderToken) < 36) {
warning("Invalid ChemSpider Token, switching to PubChem")
DB <- "PubChem"
}
if (add_refs && grepl("ChemSpider",DB)) {
MetFragScoreTypes <- "FragmenterScore,ChemSpiderReferenceCount,ChemSpiderDataSourceCount,ChemSpiderNumberPubMedReferences,ChemSpiderRSCCount"
MetFragScoreWeights <- "1.0,0.25,0.25,0.25,0.25"
} else if (add_refs && grepl("PubChem",DB)) {
MetFragDatabaseType <- "ExtendedPubChem"
MetFragScoreTypes <- "FragmenterScore,PubChemNumberPubMedReferences,PubChemNumberPatents"
MetFragScoreWeights <- "1.0,0.5,0.5"
} else if (grepl("ChemSpider",DB) || grepl("PubChem",DB) || grepl("KEGG",DB)) {
MetFragScoreTypes <- "FragmenterScore"
MetFragScoreWeights <- "1.0"
} else if (grepl("LocalCSV",DB) || grepl("LocalSDF",DB)) {
MetFragScoreTypes <- "FragmenterScore"
MetFragScoreWeights <- "1.0"
if (file.exists(LocalDatabasePath)) {
writeLines(paste("LocalDatabasePath = ",LocalDatabasePath,sep=""),con=file.conn)
} else {
warning("Local database file does not exist, defaulting to PubChem without references")
MetFragDatabaseType <- "PubChem"
}
} else {
warning("Database type incorrectly defined, defaulting to PubChem without references")
MetFragDatabaseType <- "PubChem"
MetFragScoreTypes <- "FragmenterScore"
MetFragScoreWeights <- "1.0"
}
writeLines(paste("MetFragDatabaseType = ",MetFragDatabaseType,sep=""),con=file.conn)
if ((nchar(ChemSpiderToken)>0)&& grepl("ChemSpider",MetFragDatabaseType,fixed=TRUE)) {
writeLines(paste("ChemSpiderToken = ",ChemSpiderToken,sep=""),con=file.conn)
}
if (nchar(DB_IDs)>0) {
writeLines(paste("PrecursorCompoundIDs = ",DB_IDs,sep=""),con=file.conn)
}
# if (nchar(mol_form)>0) {
# writeLines(paste("NeutralPrecursorMolecularFormula = ",mol_form,sep=""),con=file.conn)
# }
# check if retention time and training file is given, if so, add
if (nchar(rt_file_path)>1 && rt_exp > 0) {
if (file.exists(rt_file_path)) {
MetFragScoreTypes <- paste(MetFragScoreTypes,",RetentionTimeScore",sep="")
MetFragScoreWeights <- paste(MetFragScoreWeights,"1.0",sep=",")
RetentionTimeTrainingFile <- rt_file_path
ExperimentalRetentionTimeValue <- rt_exp
writeLines(paste("RetentionTimeTrainingFile = ",
as.character(RetentionTimeTrainingFile),sep=""),con=file.conn)
writeLines(paste("ExperimentalRetentionTimeValue = ",
as.character(ExperimentalRetentionTimeValue),sep=""),con=file.conn)
} else {
RetentionTimeTrainingFile <- NA
ExperimentalRetentionTimeValue <- NA
}
}
# check if suspect file is given, if so, add
if (nchar(suspect_path)>1 && !suspect_filter) {
MetFragScoreTypes <- paste(MetFragScoreTypes,",SuspectListScore",sep="")
MetFragScoreWeights <- paste(MetFragScoreWeights,"1.0",sep=",")
ScoreSuspectLists <- suspect_path
FilterSuspectLists <- NA
writeLines(paste("ScoreSuspectLists = ",as.character(ScoreSuspectLists),sep=""),con=file.conn)
} else if (nchar(suspect_path)>1 && suspect_filter) {
MetFragPreProcessingCandidateFilter <- paste(MetFragPreProcessingCandidateFilter,
",SuspectListFilter",sep="")
FilterSuspectLists <- suspect_path
ScoreSuspectLists <- NA
writeLines(paste("FilterSuspectLists = ",as.character(FilterSuspectLists),sep=""),con=file.conn)
} else {
FilterSuspectLists <- NA
ScoreSuspectLists <- NA
}
# check if useMoNAMetFusion option is true?
if (MoNAoffline) {
if (useMoNAMetFusion) {
MetFragScoreTypes <- paste(MetFragScoreTypes,",OfflineMetFusionScore",sep="")
MetFragScoreWeights <- paste(MetFragScoreWeights,"1.0",sep=",")
}
# check if useMoNAIndiv option is true?
if (useMonaIndiv) {
MetFragScoreTypes <- paste(MetFragScoreTypes,",OfflineIndividualMoNAScore",sep="")
MetFragScoreWeights <- paste(MetFragScoreWeights,"1.0",sep=",")
}
} else {
if (useMoNAMetFusion) {
MetFragScoreTypes <- paste(MetFragScoreTypes,",MetFusionMoNAScore",sep="")
MetFragScoreWeights <- paste(MetFragScoreWeights,"1.0",sep=",")
}
# check if useMoNAIndiv option is true?
if (useMonaIndiv) {
MetFragScoreTypes <- paste(MetFragScoreTypes,",IndividualMoNAScore",sep="")
MetFragScoreWeights <- paste(MetFragScoreWeights,"1.0",sep=",")
}
}
## Pre-processing Inclusion/Exclusion of elements and substructures
FilterIncludedElements <- incl_el
FilterExcludedElements <- excl_el
ElementInclusionExclusiveFilter <- incl_exclusive # default case false as this is very restrictive
FilterSmartsInclusionList <- incl_smarts_filter
FilterSmartsExclusionList <- excl_smarts_filter
SmartsSubstructureInclusionScore <- incl_smarts_score
SmartsSubstructureExclusionScore <- excl_smarts_score
#filter by included elements
if ((nchar(FilterIncludedElements)>0)&&(!ElementInclusionExclusiveFilter)) {
MetFragPreProcessingCandidateFilter <- paste(MetFragPreProcessingCandidateFilter,
",ElementInclusionFilter",sep="")
writeLines(paste("FilterIncludedElements = ",
as.character(FilterIncludedElements),sep=""),con=file.conn)
} else if ((nchar(FilterIncludedElements)>0)&&(ElementInclusionExclusiveFilter)) {
MetFragPreProcessingCandidateFilter <- paste(MetFragPreProcessingCandidateFilter,
",ElementInclusionExclusiveFilter",sep="")
writeLines(paste("FilterIncludedElements = ",
as.character(FilterIncludedElements),sep=""),con=file.conn)
}
# filter by excluded elements
if (nchar(FilterExcludedElements)>0) {
MetFragPreProcessingCandidateFilter <- paste(MetFragPreProcessingCandidateFilter,
",ElementExclusionFilter",sep="")
writeLines(paste("FilterExcludedElements = ",
as.character(FilterExcludedElements),sep=""),con=file.conn)
}
#filter by included SMARTS
if (nchar(FilterSmartsInclusionList)>0) {
MetFragPreProcessingCandidateFilter <- paste(MetFragPreProcessingCandidateFilter,
",SmartsSubstructureInclusionFilter",sep="")
writeLines(paste("FilterSmartsInclusionList = ",
as.character(FilterSmartsInclusionList),sep=""),con=file.conn)
}
#filter by excluded SMARTS
if (nchar(FilterSmartsExclusionList)>0) {
MetFragPreProcessingCandidateFilter <- paste(MetFragPreProcessingCandidateFilter,
",SmartsSubstructureExclusionFilter",sep="")
writeLines(paste("FilterSmartsExclusionList = ",
as.character(FilterSmartsExclusionList),sep=""),con=file.conn)
}
#score by included SMARTS
if (nchar(SmartsSubstructureInclusionScore)>0) {
MetFragScoreTypes <- paste(MetFragScoreTypes,",SmartsSubstructureInclusionScore",sep="")
MetFragScoreWeights <- paste(MetFragScoreWeights,",1.0",sep="")
writeLines(paste("ScoreSmartsInclusionList = ",as.character(ScoreSmartsInclusionList),sep=""),con=file.conn)
}
#score by excluded SMARTS
if (nchar(SmartsSubstructureExclusionScore)>0) {
MetFragScoreTypes <- paste(MetFragScoreTypes,",SmartsSubstructureExclusionScore",sep="")
MetFragScoreWeights <- paste(MetFragScoreWeights,",1",sep="")
writeLines(paste("ScoreSmartsExclusionList = ",as.character(FilterSmartsExclusionList),sep=""),con=file.conn)
}
#
#user defined scores ...
#UDS_Category <- ""
if ((nchar(UDS_Category)>1)) {
# if ((nchar(UDS_Category)>1)&&(grepl("Local",DB))) {
MetFragScoreTypes <- paste(MetFragScoreTypes,UDS_Category,sep=",")
MetFragScoreWeights <- paste(MetFragScoreWeights,UDS_Weights,sep="")
} #don't need to write anything extra to file as this must be in the localDB
# #References (only for ChemSpider or ExtendedPubChem)
# if (MetFragDatabaseType=="ChemSpider" && (nchar(ChemSpiderToken)>0)) {
# MetFragScoreTypes <- paste(MetFragScoreTypes,
# ",ChemSpiderReferenceCount,ChemSpiderDataSourceCount,ChemSpiderNumberPubMedReferences,ChemSpiderRSCCount",
# sep="")
# MetFragScoreWeights <- paste(MetFragScoreWeights,",0.25,0.25,0.25,0.25",sep="")
# } else if (MetFragDatabaseType=="ExtendedPubChem") {
# MetFragScoreTypes <- paste(MetFragScoreTypes,
# ",PubChemNumberPubMedReferences,PubChemNumberPatents",
# sep="")
# MetFragScoreWeights <- paste(MetFragScoreWeights,",1,1",sep="")
# }
writeLines(paste("MetFragPreProcessingCandidateFilter = ",
as.character(MetFragPreProcessingCandidateFilter),sep=""),con=file.conn)
if (filter_by_InChIKey) {
MetFragPostProcessingCandidateFilter <- "InChIKeyFilter"
writeLines(paste("MetFragPostProcessingCandidateFilter = ",
as.character(MetFragPostProcessingCandidateFilter),sep=""),con=file.conn)
}
writeLines(paste("MetFragScoreWeights = ",
as.character(MetFragScoreWeights),sep=""),con=file.conn)
writeLines(paste("MetFragScoreTypes = ",as.character(MetFragScoreTypes),sep=""),con=file.conn)
close(file.conn)
return(config_name)
}
####run MetFrag ####
#' Run MetFrag Command Line from config files
#'
#' @description This function runs MetFrag Command Line for the given configuration file and directories.
#' Note that MetFragCL must be available locally to run this. MetFrag Command Line
#' is available from \url{http://c-ruttkies.github.io/MetFrag/projects/metfragcl/}
#'
#' @usage runMetFrag(config_file, MetFrag_dir, CL_name, config_dir=dirname(config_file))
#'
#' @param config_file Full path and file name to the configuration file (as returned by \code{\link{MetFragConfig}})
#' @param MetFrag_dir Full path to the cirectory containing the MetFragCL jar file
#' @param CL_name Name of the exact MetFragCL jar file to use (e.g. \code{MetFrag2.4.4-msready-CL.jar})
#' @param config_dir Full path to the directory in which config files are located. Note that parallel directories
#' \code{log} and \code{results} are created during this process for the first config file run. Defaults to the
#' directory where \code{config_file} is located.
#'
#' @return Runs MetFragCL and creates a log file and, where successful, results files as encoded in the config file.
#' If unsuccessful a status message is printed from the jar; details are saved in the log file.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL author).
#'
#' @seealso \code{\link{MetFragConfig}}
#'
#' @export
#'
#' @examples
#' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
#' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
#' peaklist_path <- system.file("extdata","EA026206_Simazine_peaks.txt",package="ReSOLUTION")
#' # change this directory to an existing one, or this example won't work
#' test_dir <- "C:/DATA/Workflow/MetFrag22/metfrag_test_results"
#' testCSV <- system.file("extdata","dsstox_MS_Ready_MetFragTestCSV5.csv",package="ReSOLUTION")
#'
#' config_file <- MetFragConfig(201.0776,"[M+H]+","Simazine_neutralMass",peaklist_path, test_dir, DB="PubChem",neutralPrecursorMass=TRUE)
#' config_file2 <- MetFragConfig(202.0854,1,"Simazine_precMass",peaklist_path,test_dir,DB="LocalCSV",localDB_path=testCSV)
#'
#' #note this first query takes a while to run, try config_file2 for a quicker test.
#' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
#' runMetFrag(config_file2, metfrag_dir, MetFragCL_name)
#'
runMetFrag <- function(config_file, MetFrag_dir, CL_name, config_dir=dirname(config_file)) {
config_exists <- file.exists(config_file) && file.exists(config_dir)
current_dir <- getwd()
if (config_exists) {
setwd(MetFrag_dir)
log_dir <- gsub("config","log",config_dir)
if (!file.exists(log_dir)) {
dir.create(log_dir)
}
} else {
warning(paste("Configuration file ",config_file,
" or directory not found, please try a new file"))
stop()
}
MetFragCommand <- paste("java -Duser.home=",MetFrag_dir, " -jar ", CL_name, " ",config_file,sep="")
MetFrag_out <- system(command=MetFragCommand,intern=TRUE,show.output.on.console=FALSE)
log_file <- gsub("config","log",config_file)
write(MetFrag_out, log_file)
setwd(current_dir)
}
##### Prepare CompTox Dashboard XLS files for MetFrag #####
#' Prepare CompTox Dashboard MetFrag Export XLS files for MetFragCL
#'
#' @description This function prepares CompTox XLS Export files for use in MetFragCL. It converts the
#' XLS to a CSV and extracts numeric metadata field names for inclusion as scoring terms.
#'
#' @usage CompToxXLStoLocalCSVterms(xls_file, start_index=15, csv_file= "")
#'
#' @param xls_file Full path and file name to the Dashboard Export file to process
#' @param start_index The column number where the metadata columns start (default \code{15} is appropriate
#' for CompTox Dashboard MetFragBeta Export file format released March 2018)
#' @param csv_file If empty, the CSV file will have the same name as \code{xls_file} except the file ending.
#' Enter a file name here if you wish to have a different named file. If a CSV file
#' with the same name exists in the directory it is overwritten.
#'
#' @return Returns a list containing the CSV file name, a list of scoring terms to add to
#' the MetFrag config file and a corresponding score weights entry.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL),
#' Antony J. Williams and team (CompTox Dashboard)
#'
#' @seealso \code{\link{MetFragConfig}}, \code{\link{runMetFrag}}
#'
#' @export
#'
#' @examples
#' CompToxXLS <- system.file("extdata","CompToxBatchSearch_MetFrag_MSready_C10H14N2.xls",package="ReSOLUTION")
#' LocalCSVterms <- CompToxXLStoLocalCSVterms(CompToxXLS)
#'
CompToxXLStoLocalCSVterms <- function(xls_file, start_index=15, csv_file= "") {
# check csv file, if not defined, make default:
if (nchar(csv_file)<1) {
csv_file <- sub(".xls",".csv",xls_file)
}
# convert xls to csv
write.csv(read_excel(xls_file),csv_file,row.names = F)
cols <- colnames(read_excel(xls_file))
# read in the content, get colnames and test content
csv_content <- read.csv(csv_file)
#cols <- colnames(csv_content)
# now take a look at the content and run tests
include_col_i <- vector(mode="numeric",length=0)
include_col_n <- 1
# take start_index as the start ... for MS-ready this is 15 (current default)
for (i in start_index:length(cols)) {
# this tests if there are ANY numeric values in the column, to avoid errors when running MetFragCL jar
num_test <- suppressWarnings(length(grep("FALSE",is.na(as.numeric(as.character(csv_content[,i])))))>0)
if (num_test) {
include_col_i[include_col_n] <- i
include_col_n <- include_col_n + 1
}
}
# calculate the score terms and weights
ScoreTerms <- paste(cols[include_col_i],collapse=",")
ScoreWeights <- paste0(",", paste(rep(1,length(cols[include_col_i])),collapse=","))
# generate the output
LocalCSVterms <- list()
LocalCSVterms[['CSV']] <- csv_file
LocalCSVterms[['ScoreTerms']] <- ScoreTerms
LocalCSVterms[['ScoreWeights']] <- ScoreWeights
return(LocalCSVterms)
}
##### Prepare CompTox Dashboard CSV Batch files for MetFrag #####
#' Prepare CompTox Dashboard MetFrag Export CSV files for MetFragCL
#'
#' @description This function prepares CompTox CSV Export files for use in MetFragCL. It
#' extracts numeric metadata field names for inclusion as scoring terms.
#'
#' @usage CompToxCSVtoLocalCSVterms(csv_file, start_index=15)
#'
#' @param csv_file Full path and file name to the Dashboard Export file to process
#' @param start_index The column number where the metadata columns start (default \code{15} is appropriate
#' for CompTox Dashboard MetFragBeta Export file format released March 2018)
#'
#' @return Returns a list containing the CSV file name, a list of scoring terms to add to
#' the MetFrag config file and a corresponding score weights entry.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL),
#' Antony J. Williams and team (CompTox Dashboard)
#'
#' @seealso \code{\link{MetFragConfig}}, \code{\link{runMetFrag}}
#'
#' @export
#'
#' @examples
#' CompToxCSV <- system.file("extdata","CompToxBatchSearch_MetFrag_MSready_C10H14N2_wSelectMetaData.csv",package="ReSOLUTION")
#' LocalCSVterms <- CompToxCSVtoLocalCSVterms(CompToxCSV)
#'
CompToxCSVtoLocalCSVterms <- function(csv_file, start_index=15) {
# read in the content, get colnames and test content
csv_content <- read.csv(csv_file,check.names=FALSE)
cols <- colnames(csv_content)
# now take a look at the content and run tests
include_col_i <- vector(mode="numeric",length=0)
include_col_n <- 1
# take start_index as the start ... for MS-ready this is 15 (current default)
for (i in start_index:length(cols)) {
# this tests if there are ANY numeric values in the column, to avoid errors when running MetFragCL jar
num_test <- suppressWarnings(length(grep("FALSE",is.na(as.numeric(as.character(csv_content[,i])))))>0)
if (num_test) {
include_col_i[include_col_n] <- i
include_col_n <- include_col_n + 1
}
}
# calculate the score terms and weights
ScoreTerms <- paste(cols[include_col_i],collapse=",")
ScoreWeights <- paste0(",", paste(rep(1,length(cols[include_col_i])),collapse=","))
# generate the output
LocalCSVterms <- list()
LocalCSVterms[['CSV']] <- csv_file
LocalCSVterms[['ScoreTerms']] <- ScoreTerms
LocalCSVterms[['ScoreWeights']] <- ScoreWeights
return(LocalCSVterms)
}
###### Prepare CompTox Dashboard SDF files for MetFrag #####
#' Prepare CompTox Dashboard MetFrag Export SDF files for MetFragCL
#'
#' @description This function prepares CompTox SDF MetFrag Export files for use in MetFragCL. It reads the
#' SDF and extracts numeric metadata field names for inclusion as scoring terms.
#'
#' @usage CompToxSDFtoLocalSDFterms(SDF_file)
#'
#' @param SDF_file Full path and file name to the Dashboard Export SDF file
#'
#' @return Returns a list containing the SDF file name, a list of scoring terms to add to
#' the MetFrag config file and a corresponding score weights entry.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL),
#' Antony J. Williams and team (CompTox Dashboard)
#'
#' @seealso \code{\link{MetFragConfig}}, \code{\link{runMetFrag}}, \code{\link{process.sdf.file}}
#'
#' @export
#'
#' @examples
#' CompToxSDF <- system.file("extdata","CompToxBatchSearch_MetFrag_MSready_C10H14N2.sdf",package="ReSOLUTION")
#' LocalSDFterms <- CompToxSDFtoLocalSDFterms(CompToxSDF)
#'
#'
CompToxSDFtoLocalSDFterms <- function(SDF_file) {
# first, process the SDF using Christoph's processSDF function
SDF_properties <- process.sdf.file(SDF_file)
SDF_tags <- names(SDF_properties)
numeric_properties_index <- grep(TRUE, SDF_properties)
SDF_tags[numeric_properties_index]
ScoreTerms <- paste(SDF_tags[numeric_properties_index],collapse=",")
ScoreWeights <- paste0(",",paste(rep(1,length(SDF_tags[numeric_properties_index])),collapse=","))
LocalSDFterms <- list()
LocalSDFterms[['SDF']] <- SDF_file
LocalSDFterms[['ScoreTerms']] <- ScoreTerms
LocalSDFterms[['ScoreWeights']] <- ScoreWeights
return(LocalSDFterms)
}
##### Prepare CompTox Dashboard Full CSV file for MetFrag #####
#' Prepare CompTox Dashboard Full CSV file for MetFragCL
#'
#' @description This function extracts metadata headers from the CompTox CSV download file
#' for use in MetFragCL. It reads metadata field names for inclusion as scoring terms.
#'
#' @usage CompToxFullCSVtoLocalCSVterms(csv_file, start_index=13, TermsToRemove="default")
#'
#' @param csv_file Full path and file name to the Dashboard CSV Download file to process
#' @param start_index The column number where the metadata columns start (default \code{13} is appropriate
#' for CompTox Dashboard Download file designed specifically for MetFrag)
#' @param TermsToRemove Define column headers to remove from scoring terms. Select default values
#' using \code{"default"} (see details) or add a vector of strings. \code{c()}
#'
#' @return Returns a list containing the CSV file name, a list of scoring terms to add to
#' the MetFrag config file and a corresponding score weights entry.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL),
#' Antony J. Williams and team (CompTox Dashboard)
#'
#' @details The current default \code{TermsToRemove} are
#' \code{c("TOXCAST_NUMBER_OF_ASSAYS/TOTAL","TOXVAL_Link", "PPRTV_Link", "IRIS_Link")}.
#' This option removes the terms from MetFrag scoring to avoid processing errors,
#' but these columns are retained in the results file, for downstream use if desired.
#' If MetFrag exits with a status=5, check the log file for terms to add to this list.
#'
#' @seealso \code{\link{MetFragConfig}}, \code{\link{runMetFrag}}
#'
#' @export
#'
#' @examples
#' # note this is an example off a limited file
#' CompToxFullCSVFile <- system.file("extdata","dsstox_MS_Ready_MetFragTestCSV5.csv",package="ReSOLUTION")
#' LocalCSVterms <- CompToxCSVtoLocalCSVterms(CompToxFullCSVFile)
#'
CompToxFullCSVtoLocalCSVterms <- function(csv_file, start_index=13, TermsToRemove="default") {
# extract column names
cols <- colnames(read.csv(csv_file,nrows=1,check.names = FALSE))
# this ensures that only the first row is read and not the entire file, as this is huge
# as this is designed to work on a specific download file, no numeric test is performed.
# instead, terms to remove are defined or can be overwritten by the user
# calculate the score terms and weights
ScoreTerms <- paste(cols[start_index:length(cols)],collapse=",")
ScoreWeights <- paste0(",", paste(rep(1,(length(cols)-start_index+1)),collapse=","))
# have to remove columns that do not contain numeric values.
# use default definition, OR input.
DefaultTermsTest <- grepl("default",TermsToRemove,fixed=T)
DefaultTerms <- DefaultTermsTest[1] && (length(DefaultTermsTest)==1)
if (DefaultTerms) {
TermsToRemove <- c("TOXCAST_NUMBER_OF_ASSAYS/TOTAL","TOXVAL_Link", "PPRTV_Link", "IRIS_Link")
}
# split out the score terms again
ScoreTermsSplit <- strsplit(ScoreTerms,",")[[1]]
ScoreWeightsSplit <- strsplit(ScoreWeights,",")[[1]]
# calculate indices
IndicesToRemove <- as.vector(sapply(TermsToRemove,function(string) {grep(string,ScoreTermsSplit,value=F, fixed=T)}))
#grep(TermsToRemove,ScoreTerms,value=F,fixed=T)
ScoreTermsSplit <- ScoreTermsSplit[-(IndicesToRemove)]
ScoreWeightsSplit <- ScoreWeightsSplit[-(IndicesToRemove)]
ScoreTerms <- paste(ScoreTermsSplit,collapse=",")
ScoreWeights <- paste(ScoreWeightsSplit,collapse=",")
# generate the output
LocalCSVterms <- list()
LocalCSVterms[['CSV']] <- csv_file
LocalCSVterms[['ScoreTerms']] <- ScoreTerms
LocalCSVterms[['ScoreWeights']] <- ScoreWeights
return(LocalCSVterms)
}
##### Create MetFrag Config Files with CompTox LocalCSV Scoring Terms from MetFrag Export #####
#' Create MetFrag Config Files with LocalCSV and Scoring Terms from CompTox MetFrag XLS Export
#'
#' @description This is a CompTox XLS or CSV specific wrapper function for \code{\link{MetFragConfig}}.
#'
#' @usage MetFragConfig.CompToxCSV(mass, adduct_type, results_filename, peaklist_path, base_dir,
#' CompToxLocalCSVterms, ...)
#'
#' @param mass The mass with which to search the candidate database (\code{DB}). Use \code{neutralPrecursorMass} and
#' \code{adduct_type} to set whether this is monoisotopic mass or an adduct species. Defaults to \code{adduct_type}.
#' @param adduct_type The adduct species used to define mass (if \code{neutralPrecursorMass=FALSE}) and fragmentation settings
#' in the config file, entered as either \code{PrecursorIonType} (text) or \code{PrecursorIonmode} (a number). The available
#' options are given in the system file \code{MetFragAdductTypes.csv} in the \code{extdata} folder.
#' Recommended default values (if ion state is unclear) are \code{[M+H]+} (1) for positive and \code{[M-H]-} (-1) for negative mode.
#' @param results_filename Enter a base filename for naming the results files - do not include file endings
#' @param peaklist_path Enter the full path and file name to the peak list for this config file
#' @param base_dir Enter the directory name to set up the subfolders for MetFrag batch results. If the folders don't exist,
#' subfolders \code{config}, \code{log} and \code{results} are created; the output of this function is saved in \code{config}.
#' @param CompToxLocalCSVterms The output of \code{\link{CompToxXLStoLocalCSVterms}}, \code{\link{CompToxCSVtoLocalCSVterms}}
#' or \code{\link{CompToxFullCSVtoLocalCSVterms}}, used to set
#' \code{DB, localDB_path, UDS_Category and UDS_Weights} in \code{\link{MetFragConfig}}
#'
#' @return Returns a MetFrag config file name
#'
#' @details Remaining parameters are described in \code{\link{MetFragConfig}}
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL),
#' Antony J. Williams and team (CompTox Dashboard)
#'
#' @seealso \code{\link{MetFragConfig}}, \code{\link{runMetFrag}}, \code{\link{CompToxXLStoLocalCSVterms}},
#' \code{\link{CompToxCSVtoLocalCSVterms}}, \code{\link{CompToxFullCSVtoLocalCSVterms}}
#'
#' @export
#'
#' @examples
#' # Example from DOI: 10.1021/acs.est.7b01908
#' # Note that this scores automatically with all metadata fields in the example file, which
#' # is not necessarily ideal as not all predicted values are relevant for ranking the best candidate.
#' CompToxXLS <- system.file("extdata","CompToxBatchSearch_MetFrag_MSready_C10H14N2.xls",package="ReSOLUTION")
#' LocalCSVterms <- CompToxXLStoLocalCSVterms(CompToxXLS)
#' peaklist <- system.file("extdata","EQ300804_Nicotine_peaks.txt",package="ReSOLUTION")
#' test_dir <- "C:/DATA/Workflow/MetFrag22/metfrag_test_results"
#' config_file <- MetFragConfig.CompToxCSV(163.1230, "[M+H]+","Nicotine_PrecMass_MpHp_XLS",peaklist, test_dir, LocalCSVterms)
#' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
#' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
#' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
#'
#' # Example of Simazine
#' # Note that this uses a CSV file with fewer scoring terms that are more relevant for candidate selection.
#' CompToxFullCSV_test <- system.file("extdata","dsstox_MS_Ready_MetFragTestCSV5.csv",package="ReSOLUTION")
#' LocalCSVterms <- CompToxFullCSVtoLocalCSVterms(CompToxFullCSV_test)
#' peaklist <- system.file("extdata","EA026206_Simazine_peaks.txt",package="ReSOLUTION")
#' rt_file_path <- system.file("extdata","Eawag_rt_inchi.csv",package="ReSOLUTION")
#' test_dir <- "C:/DATA/Workflow/MetFrag22/metfrag_test_results"
#' MBrecord <- system.file("extdata","EA026206_Simazine.txt",package="ReSOLUTION")
#' MBinfo <- getMBRecordInfo.MetFragConfig(MBrecord,peaklist, writePeaklist=FALSE)
#' IsPosMode <- grepl(MBinfo$ion_mode,"POSITIVE")
#' adduct_type <- MBinfo$prec_type
#' config_file <- MetFragConfig.CompToxCSV(mass=MBinfo$exact_mass,adduct_type = adduct_type,
#' results_filename = paste0("Simazine", "_byExactMass_5ppm"),peaklist_path = MBinfo$peaklist,neutralPrecursorMass=TRUE,
#' base_dir = test_dir, CompToxLocalCSVterms = LocalCSVterms, IsPosMode = IsPosMode,rt_file_path = rt_file_path,
#' rt_exp = MBinfo$ret_time,filter_by_InChIKey = FALSE)
#'
#' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
#' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
#' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
#'
#'
#' # Example of diclofenac
#' peaklist <- system.file("extdata","EA020161_Diclofenac_peaks.txt",package="ReSOLUTION")
#' MBrecord <- system.file("extdata","EA020161_Diclofenac.txt",package="ReSOLUTION")
#' MBinfo <- getMBRecordInfo.MetFragConfig(MBrecord,peaklist, writePeaklist=FALSE)
#' IsPosMode <- grepl(MBinfo$ion_mode,"POSITIVE")
#' adduct_type <- MBinfo$prec_type
#' config_file <- MetFragConfig.CompToxCSV(mass=MBinfo$prec_mass,adduct_type = adduct_type,
#' results_filename = paste0("Diclofenac", "_byPrecMass_5ppm"),peaklist_path = MBinfo$peaklist,neutralPrecursorMass=FALSE,
#' base_dir = test_dir, CompToxLocalCSVterms = LocalCSVterms, IsPosMode = IsPosMode,rt_file_path = rt_file_path,
#' rt_exp = MBinfo$ret_time,filter_by_InChIKey = FALSE)
#'
#' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
#' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
#' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
#'
MetFragConfig.CompToxCSV <- function(mass, adduct_type, results_filename, peaklist_path, base_dir, CompToxLocalCSVterms,
output="XLS", ppm=5, mzabs=0.001, frag_ppm=5, IsPosMode=TRUE, tree_depth=2, num_threads=1,
add_refs=FALSE, minInt=0, rt_file_path="",rt_exp=0, suspect_path="", suspect_filter=FALSE,
#DB=c("LocalCSV","LocalSDF"), localDB_path="", UDS_Category="",UDS_Weights=""
token="", DB_IDs="",mol_form="",useFormula=FALSE,neutralPrecursorMass=FALSE,
useMoNAMetFusion=TRUE,useMonaIndiv=TRUE,MoNAoffline=TRUE,
incl_el="", excl_el="", incl_exclusive=FALSE,
incl_smarts_filter="",incl_smarts_score="", excl_smarts_filter="",excl_smarts_score="",
filter_isotopes=TRUE,filter_by_InChIKey=TRUE) {
#define parameters that are missing
#DB="localCSV"
#localDB_path=CompToxLocalCSVterms$CSV
#UDS_Category=CompToxLocalCSVterms$ScoreTerms
#UDS_Weights=CompToxLocalCSVterms$ScoreWeights
config_file <- MetFragConfig(mass=mass, adduct_type=adduct_type, results_filename=results_filename, peaklist_path=peaklist_path,
base_dir=base_dir, DB="LocalCSV", localDB_path = CompToxLocalCSVterms$CSV,
UDS_Category=CompToxLocalCSVterms$ScoreTerms,
UDS_Weights=CompToxLocalCSVterms$ScoreWeights,
output=output, ppm=ppm, mzabs=mzabs, frag_ppm=frag_ppm, IsPosMode=IsPosMode,
tree_depth=tree_depth, num_threads=num_threads,
add_refs=add_refs, minInt=minInt, rt_file_path=rt_file_path, rt_exp=rt_exp, suspect_path=suspect_path,
suspect_filter=suspect_filter, token=token, DB_IDs=DB_IDs,mol_form=mol_form,useFormula=useFormula,
neutralPrecursorMass=neutralPrecursorMass,
useMoNAMetFusion=useMoNAMetFusion, useMonaIndiv=useMonaIndiv, MoNAoffline=MoNAoffline,
incl_el=incl_el, excl_el=excl_el, incl_exclusive=incl_exclusive,
incl_smarts_filter=incl_smarts_filter, incl_smarts_score=incl_smarts_score,
excl_smarts_filter=excl_smarts_filter, excl_smarts_score=excl_smarts_score,
filter_isotopes=filter_isotopes,filter_by_InChIKey=filter_by_InChIKey)
return(config_file)
}
##### Create MetFrag Config Files with CompTox LocalSDF Scoring Terms from MetFrag Export #####
#' Create MetFrag Config Files with LocalSDF and Scoring Terms from CompTox MetFrag SDF Export
#'
#' @description This is a CompTox SDF Export-specific wrapper function for \code{\link{MetFragConfig}}.
#'
#' @usage MetFragConfig.CompToxExportSDF(mass, adduct_type, results_filename, peaklist_path, base_dir,
#' CompToxSDFtoLocalSDFterms, ...)
#'
#' @param mass The mass with which to search the candidate database (\code{DB}). Use \code{neutralPrecursorMass} and
#' \code{adduct_type} to set whether this is monoisotopic mass or an adduct species. Defaults to \code{adduct_type}.
#' @param adduct_type The adduct species used to define mass (if \code{neutralPrecursorMass=FALSE}) and fragmentation settings
#' in the config file, entered as either \code{PrecursorIonType} (text) or \code{PrecursorIonmode} (a number). The available
#' options are given in the system file \code{MetFragAdductTypes.csv} in the \code{extdata} folder.
#' Recommended default values (if ion state is unclear) are \code{[M+H]+} (1) for positive and \code{[M-H]-} (-1) for negative mode.
#' @param results_filename Enter a base filename for naming the results files - do not include file endings
#' @param peaklist_path Enter the full path and file name to the peak list for this config file
#' @param base_dir Enter the directory name to set up the subfolders for MetFrag batch results. If the folders don't exist,
#' subfolders \code{config}, \code{log} and \code{results} are created; the output of this function is saved in \code{config}.
#' @param CompToxSDFtoLocalSDFterms The output of \code{\link{CompToxSDFtoLocalSDFterms}}, used to set
#' \code{DB, localDB_path, UDS_Category and UDS_Weights} in \code{\link{MetFragConfig}}
#'
#' @return Returns a MetFrag config file name
#'
#' @details Remaining parameters are described in \code{\link{MetFragConfig}}
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL),
#' Antony J. Williams and team (CompTox Dashboard)
#'
#' @seealso \code{\link{MetFragConfig}}, \code{\link{runMetFrag}}, \code{\link{CompToxSDFtoLocalSDFterms}}
#'
#' @export
#'
#' @examples
#' # Example from DOI: 10.1021/acs.est.7b01908
#' CompToxSDF <- system.file("extdata","CompToxBatchSearch_MetFrag_MSready_C10H14N2.sdf",package="ReSOLUTION")
#' LocalSDFterms <- CompToxSDFtoLocalSDFterms(CompToxSDF)
#' peaklist <- system.file("extdata","EQ300804_Nicotine_peaks.txt",package="ReSOLUTION")
#' test_dir <- "C:/DATA/Workflow/MetFrag22/metfrag_test_results"
#' config_file <- MetFragConfig.CompToxExportSDF(163.1230, "[M+H]+","Nicotine_PrecMass_MpHp_SDF",peaklist, test_dir, LocalSDFterms)
#' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
#' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
#' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
#'
MetFragConfig.CompToxExportSDF <- function(mass, adduct_type, results_filename, peaklist_path, base_dir, CompToxSDFtoLocalSDFterms,
output="XLS", ppm=5, mzabs=0.001, frag_ppm=5, IsPosMode=TRUE, tree_depth=2, num_threads=1,
add_refs=FALSE, minInt=0, rt_file_path="",rt_exp=0, suspect_path="", suspect_filter=FALSE,
#DB=c("LocalCSV","LocalSDF"), localDB_path="", UDS_Category="",UDS_Weights=""
token="", DB_IDs="",mol_form="",useFormula=FALSE,neutralPrecursorMass=FALSE,
useMoNAMetFusion=TRUE,useMonaIndiv=TRUE,MoNAoffline=TRUE,
incl_el="", excl_el="", incl_exclusive=FALSE,
incl_smarts_filter="",incl_smarts_score="", excl_smarts_filter="",excl_smarts_score="",
filter_isotopes=TRUE,filter_by_InChIKey=TRUE) {
#define parameters that are missing
#DB="localSDF"
#localDB_path=CompToxSDFtoLocalSDFterms$SDF
#UDS_Category=CompToxSDFtoLocalSDFterms$ScoreTerms
#UDS_Weights=CompToxSDFtoLocalSDFterms$ScoreWeights
config_file <- MetFragConfig(mass=mass, adduct_type=adduct_type, results_filename=results_filename, peaklist_path=peaklist_path,
base_dir=base_dir, DB="LocalSDF", localDB_path = CompToxSDFtoLocalSDFterms$SDF,
UDS_Category=CompToxSDFtoLocalSDFterms$ScoreTerms,
UDS_Weights=CompToxSDFtoLocalSDFterms$ScoreWeights,
output=output, ppm=ppm, mzabs=mzabs, frag_ppm=frag_ppm, IsPosMode=IsPosMode,
tree_depth=tree_depth, num_threads=num_threads,
add_refs=add_refs, minInt=minInt, rt_file_path=rt_file_path, rt_exp=rt_exp, suspect_path=suspect_path,
suspect_filter=suspect_filter, token=token, DB_IDs=DB_IDs,mol_form=mol_form,useFormula=useFormula,
neutralPrecursorMass=neutralPrecursorMass,
useMoNAMetFusion=useMoNAMetFusion, useMonaIndiv=useMonaIndiv, MoNAoffline=MoNAoffline,
incl_el=incl_el, excl_el=excl_el, incl_exclusive=incl_exclusive,
incl_smarts_filter=incl_smarts_filter, incl_smarts_score=incl_smarts_score,
excl_smarts_filter=excl_smarts_filter, excl_smarts_score=excl_smarts_score,
filter_isotopes=filter_isotopes,filter_by_InChIKey=filter_by_InChIKey)
return(config_file)
}
# ##### Create MetFrag Config Files with CompTox LocalCSV Scoring Terms from Full CSV Download File #####
# ## Superceded, generalised the CSV function ... no need for specific one... as they use the same localCSVterms
#
# #' Create MetFrag Config Files with LocalCSV and Scoring Terms from CompTox Full CSV Download File
# #'
# #' @description This is a CompTox CSV Download-specific wrapper function for \code{\link{MetFragConfig}}.
# #'
# #' @usage MetFragConfig.CompToxFullCSV(mass, adduct_type, results_filename, peaklist_path, base_dir,
# #' CompToxFullCSVtoLocalCSVterms, ...)
# #'
# #' @param mass The mass with which to search the candidate database (\code{DB}). Use \code{neutralPrecursorMass} and
# #' \code{adduct_type} to set whether this is monoisotopic mass or an adduct species. Defaults to \code{adduct_type}.
# #' @param adduct_type The adduct species used to define mass (if \code{neutralPrecursorMass=FALSE}) and fragmentation settings
# #' in the config file, entered as either \code{PrecursorIonType} (text) or \code{PrecursorIonmode} (a number). The available
# #' options are given in the system file \code{MetFragAdductTypes.csv} in the \code{extdata} folder.
# #' Recommended default values (if ion state is unclear) are \code{[M+H]+} (1) for positive and \code{[M-H]-} (-1) for negative mode.
# #' @param results_filename Enter a base filename for naming the results files - do not include file endings
# #' @param peaklist_path Enter the full path and file name to the peak list for this config file
# #' @param base_dir Enter the directory name to set up the subfolders for MetFrag batch results. If the folders don't exist,
# #' subfolders \code{config}, \code{log} and \code{results} are created; the output of this function is saved in \code{config}.
# #' @param CompToxFullCSVtoLocalCSVterms The output of \code{\link{CompToxFullCSVtoLocalCSVterms}}, used to set
# #' \code{DB, localDB_path, UDS_Category and UDS_Weights} in \code{\link{MetFragConfig}}
# #'
# #' @return Returns a MetFrag config file name
# #'
# #' @details Remaining parameters are described in \code{\link{MetFragConfig}}. PROTOTYPE AT THIS STAGE
# #'
# #' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL),
# #' Antony J. Williams and team (CompTox Dashboard)
# #'
# #' @seealso \code{\link{MetFragConfig}}, \code{\link{runMetFrag}}, \code{\link{CompToxFullCSVtoLocalCSVterms}}
# #'
# #' @export
# #'
# #' @examples
# #' # Example of Simazine
# #' CompToxFullCSV_test <- system.file("extdata","dsstox_MS_Ready_MetFragTestCSV5.csv",package="ReSOLUTION")
# #' LocalCSVterms <- CompToxFullCSVtoLocalCSVterms(CompToxFullCSV_test)
# #' peaklist <- system.file("extdata","EA026206_Simazine_peaks.txt",package="ReSOLUTION")
# #' rt_file_path <- system.file("extdata","Eawag_rt_inchi.csv",package="ReSOLUTION")
# #' test_dir <- "C:/DATA/Workflow/MetFrag22/metfrag_test_results"
# #' MBrecord <- system.file("extdata","EA026206_Simazine.txt",package="ReSOLUTION")
# #' MBinfo <- getMBRecordInfo.MetFragConfig(MBrecord,peaklist, writePeaklist=FALSE)
# #' IsPosMode <- grepl(MBinfo$ion_mode,"POSITIVE")
# #' adduct_type <- MBinfo$prec_type
# #' config_file <- MetFragConfig.CompToxFullCSV(mass=MBinfo$exact_mass,adduct_type = adduct_type,
# #' results_filename = paste0("Simazine", "_byExactMass_5ppm"),peaklist_path = MBinfo$peaklist,neutralPrecursorMass=TRUE,
# #' base_dir = test_dir, CompToxFullCSVtoLocalCSVterms = LocalCSVterms, IsPosMode = IsPosMode,rt_file_path = rt_file_path,
# #' rt_exp = MBinfo$ret_time,filter_by_InChIKey = FALSE)
# #'
# #' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
# #' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
# #' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
# #'
# #'
# #' # Example of diclofenac
# #' peaklist <- system.file("extdata","EA020161_Diclofenac_peaks.txt",package="ReSOLUTION")
# #' MBrecord <- system.file("extdata","EA020161_Diclofenac.txt",package="ReSOLUTION")
# #' MBinfo <- getMBRecordInfo.MetFragConfig(MBrecord,peaklist, writePeaklist=FALSE)
# #' IsPosMode <- grepl(MBinfo$ion_mode,"POSITIVE")
# #' adduct_type <- MBinfo$prec_type
# #' config_file <- MetFragConfig.CompToxFullCSV(mass=MBinfo$prec_mass,adduct_type = adduct_type,
# #' results_filename = paste0("Diclofenac", "_byPrecMass_5ppm"),peaklist_path = MBinfo$peaklist,neutralPrecursorMass=FALSE,
# #' base_dir = test_dir, CompToxFullCSVtoLocalCSVterms = LocalCSVterms, IsPosMode = IsPosMode,rt_file_path = rt_file_path,
# #' rt_exp = MBinfo$ret_time,filter_by_InChIKey = FALSE)
# #'
# #' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
# #' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
# #' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
# #'
# #'
# #'
# MetFragConfig.CompToxFullCSV <- function(mass, adduct_type, results_filename, peaklist_path, base_dir, CompToxFullCSVtoLocalCSVterms,
# output="XLS", ppm=5, mzabs=0.001, frag_ppm=5, IsPosMode=TRUE, tree_depth=2, num_threads=1,
# add_refs=FALSE, minInt=0, rt_file_path="",rt_exp=0, suspect_path="", suspect_filter=FALSE,
# #DB=c("LocalCSV","LocalSDF"), localDB_path="", UDS_Category="",UDS_Weights=""
# token="", DB_IDs="",mol_form="",useFormula=FALSE,neutralPrecursorMass=FALSE,
# useMoNAMetFusion=TRUE,useMonaIndiv=TRUE,MoNAoffline=TRUE,
# incl_el="", excl_el="", incl_exclusive=FALSE,
# incl_smarts_filter="",incl_smarts_score="", excl_smarts_filter="",excl_smarts_score="",
# filter_isotopes=TRUE,filter_by_InChIKey=TRUE) {
# #define parameters that are missing
# #DB="localCSV"
# #localDB_path=CompToxXLStoLocalCSVterms$CSV
# #UDS_Category=CompToxXLStoLocalCSVterms$ScoreTerms
# #UDS_Weights=CompToxXLStoLocalCSVterms$ScoreWeights
# config_file <- MetFragConfig(mass=mass, adduct_type=adduct_type, results_filename=results_filename, peaklist_path=peaklist_path,
# base_dir=base_dir, DB="LocalCSV", localDB_path = CompToxFullCSVtoLocalCSVterms$CSV,
# UDS_Category=CompToxFullCSVtoLocalCSVterms$ScoreTerms,
# UDS_Weights=CompToxFullCSVtoLocalCSVterms$ScoreWeights,
# output=output, ppm=ppm, mzabs=mzabs, frag_ppm=frag_ppm, IsPosMode=IsPosMode,
# tree_depth=tree_depth, num_threads=num_threads,
# add_refs=add_refs, minInt=minInt, rt_file_path=rt_file_path, rt_exp=rt_exp, suspect_path=suspect_path,
# suspect_filter=suspect_filter, token=token, DB_IDs=DB_IDs,mol_form=mol_form,useFormula=useFormula,
# neutralPrecursorMass=neutralPrecursorMass,
# useMoNAMetFusion=useMoNAMetFusion, useMonaIndiv=useMonaIndiv, MoNAoffline=MoNAoffline,
# incl_el=incl_el, excl_el=excl_el, incl_exclusive=incl_exclusive,
# incl_smarts_filter=incl_smarts_filter, incl_smarts_score=incl_smarts_score,
# excl_smarts_filter=excl_smarts_filter, excl_smarts_score=excl_smarts_score,
# filter_isotopes=filter_isotopes,filter_by_InChIKey=filter_by_InChIKey)
#
# return(config_file)
# }
##### Create MetFrag Config Files with Formula not Mass #####
#' Create MetFrag Config Files with Molecular Formula Candidate Searching
#'
#' @description This is a molecular formula-specific wrapper function for \code{\link{MetFragConfig}}.
#'
#' @usage MetFragConfig.formula(mol_form, adduct_type, IsPosMode, results_filename, peaklist_path, base_dir, ...)
#'
#' @param mol_form The molecular formula (as a string) with which to search the candidate database (\code{DB}).
#' \code{mass} is calculated automatically.
#' @param adduct_type The adduct species used to define mass (if \code{neutralPrecursorMass=FALSE}) and fragmentation settings
#' in the config file, entered as either \code{PrecursorIonType} (text) or \code{PrecursorIonmode} (a number). The available
#' options are given in the system file \code{MetFragAdductTypes.csv} in the \code{extdata} folder.
#' Recommended default values (if ion state is unclear) are \code{[M+H]+} (1) for positive and \code{[M-H]-} (-1) for negative mode.
#' @param isPosMode Use \code{TRUE} or \code{FALSE} to set positive or negative ionization as appropriate.
#' @param results_filename Enter a base filename for naming the results files - do not include file endings
#' @param peaklist_path Enter the full path and file name to the peak list for this config file
#' @param base_dir Enter the directory name to set up the subfolders for MetFrag batch results. If the folders don't exist,
#' subfolders \code{config}, \code{log} and \code{results} are created; the output of this function is saved in \code{config}.
#'
#' @return Returns a MetFrag config file name
#'
#' @details Remaining parameters are described in \code{\link{MetFragConfig}}
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu> in partnership with Christoph Ruttkies (MetFragCL),
#' Antony J. Williams and team (CompTox Dashboard)
#'
#' @seealso \code{\link{MetFragConfig}}, \code{\link{runMetFrag}}, \code{\link{getAdductMassesFromFormula}}
#'
#' @export
#'
#' @examples
#' # Example of Simazine
#' peaklist_path <- system.file("extdata","EA026206_Simazine_peaks.txt",package="ReSOLUTION")
#' # change this directory to an existing one, or this example won't work
#' test_dir <- "C:/DATA/Workflow/MetFrag22/metfrag_test_results"
#' testCSV <- system.file("extdata","dsstox_MS_Ready_MetFragTestCSV5.csv",package="ReSOLUTION")
#'
#' config_file <- MetFragConfig.formula("C7H12ClN5","[M+H]+",IsPosMode=TRUE, "Simazine_formula_PubChem",peaklist_path, test_dir,
#' DB="PubChem")
#' config_file2 <- MetFragConfig.formula("C7H12ClN5",1,IsPosMode=TRUE, "Simazine_formula_CompTox",peaklist_path,test_dir,
#' DB="LocalCSV",localDB_path=testCSV)
#' config_file3 <- MetFragConfig.formula("C7H12ClN5",1,IsPosMode=TRUE, "Simazine_formula_CompTox_noInChIFilter",peaklist_path,test_dir,
#' DB="LocalCSV",localDB_path=testCSV,filter_by_InChIKey = FALSE)
#'
#' metfrag_dir <- "C:/DATA/Workflow/MetFrag22/"
#' MetFragCL_name <- "MetFrag2.4.4-msready-CL.jar"
#' #note this first query is a longer query using PubChem
#' runMetFrag(config_file, metfrag_dir, MetFragCL_name)
#' runMetFrag(config_file2, metfrag_dir, MetFragCL_name)
#' runMetFrag(config_file3, metfrag_dir, MetFragCL_name)
#'
MetFragConfig.formula <- function(mol_form, adduct_type, IsPosMode, results_filename, peaklist_path, base_dir,
DB=c("KEGG","PubChem","ExtendedPubChem","ChemSpider","FOR-IDENT","MetaCyc",
"LocalCSV","LocalPSV","LocalSDF"), localDB_path="",
output="XLS", ppm=5, mzabs=0.001, frag_ppm=5, tree_depth=2, num_threads=1,
add_refs=FALSE, minInt=0, rt_file_path="",rt_exp=0, suspect_path="", suspect_filter=FALSE,
UDS_Category="",UDS_Weights="", token="", DB_IDs="",neutralPrecursorMass=TRUE,
useMoNAMetFusion=TRUE,useMonaIndiv=TRUE,MoNAoffline=TRUE,
incl_el="", excl_el="", incl_exclusive=FALSE,
incl_smarts_filter="",incl_smarts_score="", excl_smarts_filter="",excl_smarts_score="",
filter_isotopes=TRUE,filter_by_InChIKey=TRUE) {
#define parameters that are missing
#useFormula=TRUE
#mass=getAdductMassesFromFormula(mol_form)$Monoiso_mass
mass <- getAdductMassesFromFormula(mol_form)$Monoiso_mass
useFormula <- TRUE
# hard program this as we fix it in the wrapper
neutralPrecursorMass=TRUE
config_file <- MetFragConfig(mass=mass, adduct_type=adduct_type, results_filename=results_filename, peaklist_path=peaklist_path,
base_dir=base_dir, DB=DB, localDB_path = localDB_path, mol_form=mol_form, useFormula=useFormula,
output=output, ppm=ppm, mzabs=mzabs, frag_ppm=frag_ppm, IsPosMode=IsPosMode,
tree_depth=tree_depth, num_threads=num_threads,
add_refs=add_refs, minInt=minInt, rt_file_path=rt_file_path, rt_exp=rt_exp, suspect_path=suspect_path,
suspect_filter=suspect_filter, token=token, DB_IDs=DB_IDs,
neutralPrecursorMass=neutralPrecursorMass,
UDS_Category=UDS_Category,UDS_Weights=UDS_Weights,
useMoNAMetFusion=useMoNAMetFusion, useMonaIndiv=useMonaIndiv, MoNAoffline=MoNAoffline,
incl_el=incl_el, excl_el=excl_el, incl_exclusive=incl_exclusive,
incl_smarts_filter=incl_smarts_filter, incl_smarts_score=incl_smarts_score,
excl_smarts_filter=excl_smarts_filter, excl_smarts_score=excl_smarts_score,
filter_isotopes=filter_isotopes,filter_by_InChIKey=filter_by_InChIKey)
return(config_file)
}
###### Extract Info from MassBank records for MetFrag Config files #####
#' Extract Info from MassBank records for MetFrag Config files
#'
#' @description This is a convenience wrapper to extract info for MetFrag Config files if source mass spectra
#' are already in MassBank format.
#'
#' @usage getMBRecordInfo.MetFragConfig(MBrecord,peaklist_filepath,writePeaklist=TRUE)
#'
#' @param MBrecord Full path to the MassBank record containing peaks of interest
#' @param peaklist_filepath Full path to the peaklist that will be used in the MetFrag Config file. It is
#' recommeded to use a temporary file.
#' @param writePeaklist Default \code{TRUE} to extract and write peaklists to a temporary file. To use an existing
#' file, set to \code{FALSE}.
#'
#' @return Returns a list containing the peaklist filename, molecular formula, exact mass, precursor type,
#' ion mode and retention time
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>
#'
#' @seealso \code{\link{getMBRecordPeaks}}, \code{\link{getMBRecordEntry}}, \code{\link{MetFragConfig}}
#'
#' @export
#'
#' @examples
#'
#' peaklist <- system.file("extdata","EA020161_Diclofenac_peaks.txt",package="ReSOLUTION")
#' MBrecord <- system.file("extdata","EA020161_Diclofenac.txt",package="ReSOLUTION")
#' MBinfo <- getMBRecordInfo.MetFragConfig(MBrecord,peaklist, writePeaklist=FALSE)
#' # use writePeaklist=TRUE to create a peaklist from the MassBank record
#'
getMBRecordInfo.MetFragConfig <- function(MBrecord,peaklist_filepath,writePeaklist=TRUE) {
# get the data
if (writePeaklist) {
write.table(x = getMBRecordPeaks(MBrecord), file = peaklist_filepath,quote = F,row.names=F,col.names = F)
}
recVec <- MBFileToVector(MBrecord)
cmpd_Name <- getMBRecordEntry("CH$NAME:", recVec)[1] # returns the first name only
cmpd_SMILES <- getMBRecordEntry("CH$SMILES:", recVec)
cmpd_InChIKey <- getMBRecordEntry("CH$LINK: INCHIKEY", recVec)
mol_form <- getMBRecordEntry("CH$FORMULA:", recVec)
exact_mass <- getMBRecordEntry("CH$EXACT_MASS", recVec)
prec_mass <- getMBRecordEntry("MS$FOCUSED_ION: PRECURSOR_M/Z", recVec)
prec_type <- getMBRecordEntry("MS$FOCUSED_ION: PRECURSOR_TYPE",recVec)
ion_mode <- getMBRecordEntry("AC$MASS_SPECTROMETRY: ION_MODE",recVec)
ret_time <- strsplit(getMBRecordEntry("AC$CHROMATOGRAPHY: RETENTION_TIME",recVec)," ",fixed=TRUE)[[1]][1]
ret_time_unit <- strsplit(getMBRecordEntry("AC$CHROMATOGRAPHY: RETENTION_TIME",recVec)," ",fixed=TRUE)[[1]][2]
# export the data
MetFragMBInfo <- list()
MetFragMBInfo[['source_file']] <- MBrecord
MetFragMBInfo[['peaklist']] <- peaklist_filepath
MetFragMBInfo[['cmpd_Name']] <- cmpd_Name
MetFragMBInfo[['cmpd_SMILES']] <- cmpd_SMILES
MetFragMBInfo[['cmpd_InChIKey']] <- cmpd_InChIKey
MetFragMBInfo[['mol_form']] <- mol_form
MetFragMBInfo[['exact_mass']] <- exact_mass
MetFragMBInfo[['prec_mass']] <- prec_mass
MetFragMBInfo[['prec_type']] <- prec_type
MetFragMBInfo[['ion_mode']] <- ion_mode
MetFragMBInfo[['ret_time']] <- ret_time
MetFragMBInfo[['ret_time_unit']] <- ret_time_unit
return(MetFragMBInfo)
}
##### Fill in missing InChIKeys in CSV files ####
#' Fill in Missing InChIKeys in CSV files
#'
#' @description This is a small wrapper function to patch CSV files that are missing some
#' InChIKey entries, using Cactus then Open Babel to fill gaps from SMILES.
#'
#' @usage addMissingInChIKeys.CSV(csv_file, inchikey_col_number, smiles_col_number, babel_dir, write=TRUE, csv_file_out="")
#'
#' @param csv_file Full path and file name to CSV file to fill in InChIKey gaps
#' @param inchikey_col_number Column number of the column containing InChIKeys to check/fill
#' @param smiles_col_number Column number of the SMILES code needed to calculate missing InChIKeys
#' @param babel_dir Path to the directory containing \code{obabel.exe}, e.g. \code{"C:/Program Files (x86)/OpenBabel-2.3.2"}
#' @param write If \code{TRUE}, writes either to \code{csv_file_out} (of defined) or overwrites \code{csv_file} and
#' returns the file name. If \code{FALSE}, returns a list of InChIKeys and does not write to file.
#' @param csv_file_out Full path and file name of output CSV, define to avoid overwriting the original file.
#'
#' @return Returns path and file name to CSV file with fixed entries (if necessary) or list of InChIKeys.
#'
#' @author Emma Schymanski <emma.schymanski@@uni.lu>
#'
#' @seealso \code{\link{getInChIKey.obabel}}, \code{\link{getCactus}}, \code{\link{InChIKey_test}}
#'
#' @export
#'
#' @examples
#' testCSV <- system.file("extdata","CompToxBatchSearch_MetFrag_MSready_C10H14N2_wSelectMetaData.csv",package="ReSOLUTION")
#' babel_dir <- "C:/Program Files (x86)/OpenBabel-2.3.2"
#' InChIKeys <- addMissingInChIKeys.CSV(testCSV,13,5,babel_dir,write=FALSE)
#'
addMissingInChIKeys.CSV <- function(csv_file, inchikey_col_number, smiles_col_number, babel_dir, write=TRUE, csv_file_out="") {
csv_content <- read.csv(csv_file,check.names=FALSE,colClasses = "character")
cols <- colnames(csv_content)
InChIKeys <- csv_content[,inchikey_col_number]
# find out whether any InChIKeys are missing
InChIKeyCheck <- sapply(InChIKeys, InChIKey_test)
MissingInChIKey_index <- as.vector(which(InChIKeyCheck==FALSE))
csv_file_name <- csv_file
if (length(MissingInChIKey_index<1)) {
print("No InChIKeys missing; no changes to file")
} else {
for (i in MissingInChIKey_index) {
SMILES <- csv_content[i,smiles_col_number]
InChIKey <- trimKey(getCactus(SMILES,stdinchikey))
if (InChIKey_test(InChIKey)) {
csv_content[i,inchikey_col_number] <- InChIKey
} else {
InChIKey <- getInChIKey.obabel(SMILES,babel_dir)
}
if (InChIKey_test(InChIKey)) {
csv_content[i,inchikey_col_number] <- InChIKey
} else {
print("No InChIKey found using Cactus or OpenBabel")
}
if (write) {
if (nchar(csv_file_out)>3) {
write.csv(csv_content,csv_file_out,row.names=FALSE)
print("New CSV file written with InChIKeys filled in ")
csv_file_name <- csv_file_out
} else {
write.csv(csv_content,csv_file,row.names=FALSE)
print("Input CSV file overwritten with InChIKeys filled in")
}
} else {
print("Missing InChIKeys calculated but no output requested; returning InChIKey list")
}
}
}
if (write) {
return(csv_file_name)
} else {
InChIKeys <- csv_content[,inchikey_col_number]
return(InChIKeys)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.