directories/parameters
# top level project into subdirectories of which outputs can be written projDir <- 'full_path_to_working_directory' # MS1 level profiling data mzXml/mzML/mgf directory ms1DataDir <- 'full_path_to_ms1Data_directory' # MS2 level profiling data mzXml/mzML/mgf directory ms2DataDir <- 'full_path_to_ms2Data_directory' # ionization mode ionMode <- 'pos/neg' # sample string (optional pre-processing with MetMSLine) sampStr <- 'sample' # blank string (optional pre-processing with MetMSLine) blankStr <- 'blank' # QC string (optional pre-processing with MetMSLine) qcStr <- 'QC' # number of cores for parallel # use all cores default nCores <- parallel::detectCores()
generate subdirectories in project folder
dir.create(paste0(projDir, '/output')) dir.create(paste0(projDir, '/R')) dir.create(paste0(projDir, '/R/rDataFiles')) dir.create(paste0(projDir, '/data')) dir.create(paste0(projDir, '/doc'))
Load packages
# required packages suppressWarnings(suppressPackageStartupMessages(library(xcms))) suppressPackageStartupMessages(library(snow)) suppressPackageStartupMessages(library(CAMERA)) suppressPackageStartupMessages(library(compMS2Miner)) suppressPackageStartupMessages(library(MetMSLine))
setwd(ms1DataDir) peakmatrix <- xcmsSet(method="centWave", peakwidth=c(2, 20), ppm=10, snthresh=5, nSlaves=nCores, verbose.columns=TRUE) ret <- retcor(peakmatrix, method="obiwarp", plottype=c("deviation")) gret <- group(ret, bw=2, mzwid=0.015, minfrac=0.25) fill <- fillPeaks.chrom(gret, nSlaves=nCores)
# taken from Beyond Profiling Stanstrup et. al. # Select which rules to use rules.jan <- read.csv('full_path_to_CAMERA_rules', header=TRUE, stringsAsFactors = FALSE) #Create an xsAnnotate object xsa <- xsAnnotate(fill, polarity = ifelse(ionMode == 'neg', 'negative', 'positive')) #Group after RT value of the xcms grouped peak xsaF <- groupFWHM(xsa, perfwhm=1) #Verify grouping #calcCiS just because then we don't the raw data xsaC <- groupCorr(xsaF,graphMethod='lpc', pval=0.000001, calcCaS = TRUE, cor_exp_th=0.7, cor_eic_th=0.7,calcCiS = FALSE, calcIso = FALSE) #Annotate isotopes xsaFI <- findIsotopes(xsaC, ppm=10, mzabs=0.01, intval="into") #Annotate adducts and neutral losses xsaFA <- findAdducts(xsaFI, polarity=ifelse(ionMode == 'neg', 'negative', 'positive'), ppm=10, mzabs=0.01, multiplier=4, rules=rules.jan) #Generate result peakTable <- getPeaklist(xsaFA) # rearrange columns names(peakTable)[names(peakTable) == 'mz'] <- 'mzmed' names(peakTable)[names(peakTable) == 'rt'] <- 'rtmed' peakTable <- cbind(1:nrow(peakTable), peakTable) colnames(peakTable)[1] <- 'EICno' colIndxTmp <- grep("EICno|mzmed|rtmed|adduct", colnames(peakTable)) peakTable <- peakTable[, c(colIndxTmp, setdiff(1:ncol(peakTable), colIndxTmp))] peakTable[, c(1:3)] <- apply(peakTable[, 1:3], 2, as.numeric) message('total LC-MS features peakTable: ', nrow(peakTable), '\n') # save results setwd(projDir) write.csv(peakTable, file="full_path_to_peak_table_output_csv", row.names=FALSE)
# observation names obsNames <- colnames(peakTable)[grep(sampStr, colnames(peakTable))] blanks <- colnames(peakTable)[grep(blankStr, colnames(peakTable))] qcs <- colnames(peakTable)[grep(qcStr, colnames(peakTable))] # zero fill peakTable <- MetMSLine::zeroFill(peakTable, c(obsNames, blanks)) # blank fold change greater than 2 peakTable <- MetMSLine::blankSub(peakTable, blankNames=blanks, sampNames=obsNames, thresh=2) # cv calculation using QC samples (retain features >= 30 %) peakTable <- MetMSLine::cvCalc(peakTable, qcs, thresh=30) # log transform peakTable <- MetMSLine::logTrans(peakTable, c(obsNames, blanks))
# create compMS2 object compMS2_object <- compMS2Construct(MS1features=peakTable, msDataDir=ms2DataDir, nCores=nCores, mode = ionMode, precursorPpm = 10, ret = 20, TICfilter = 10000, verbose=FALSE) compMS2_object
compMS2_object <- deconvNoise(compMS2_object, "DNF", verbose=FALSE) compMS2_object
compMS2_object <- combineMS2(compMS2_object, "Ions", verbose=FALSE) compMS2_object
compMS2_object <- combineMS2(compMS2_object, "Spectra", mzError=0.01, specSimFilter=0.8, verbose=FALSE) compMS2_object
compMS2_object <- combineMS2(compMS2_object, 'removeContam', nContams=6, maxRtGap=Inf, remContam=TRUE) compMS2_object
compMS2_object <- subStructure(compMS2_object, 'Annotate', Frag_mzabs=0.05) # identify most probable substructure annotation based on total relative intensity # explained compMS2_object <- subStructure(compMS2_object, "prob") # summary of most probable substructure annotation mostProbSubStr <- subStructure(compMS2_object, "probSummary")
If smiles codes are not detected then they are calculated from InChI (more commonly available in MoNa) using OpenBabel.
compMS2_object <- metID(compMS2_object, 'matchSpectralDB', mspFile='full_path_to_msp_file', minDBDotProdThresh=0.65, verbose=FALSE)
False positives are removed based on summed average word frequency of abstracts returned from PubMed and cleaned. Erroneous drugs, pesticides and antibiotics can be identified and removed and/or flagged by this method.
compMS2_object <- falsePosIdentify(compMS2_object, removeFP=TRUE, prohibKeyWords="drug|pesticide|antibiotic|placebo", meanFreqPerAbs=0.6, verbose=FALSE)
taking in to account cluster ions identified by the CAMERA package and included in the 4th column of the MS1feature table and additional mandatory common electrospray adducts.
# substructure types # none subStrMassShift <- c(42.010565, 119.004101, 176.03209, 255.988909, 305.068159, 57.021464, 161.014666, 79.956817) names(subStrMassShift) <- c("acetyl", "cysteine", "glucuronide", "glucuronide sulfate", "glutathione", "glycine", "mercapturate", "sulfate") # common mandatory esi adducts i.e. added to those detected in the MS1 data by CAMERA if(ionMode == 'neg'){ mandEsiAdducts <- c('[M-H2O-H]-', '[M+Na-2H]-', '[M+Cl]-', '[M+K-2H]-', '[M-H+HCOOH]-', '[M-H+CH3COOH+Na]-', '[M-H+CH3COOH]-') } else { mandEsiAdducts <- c('[M+NH4]+', '[M+H3O]+', '[M+Na]+','[M+CH3OH+H]+', '[M+CH3COONa]+', '[M+K]+', '[M+2Na-H]+') } # what if any substructure detected? subStrMassShift <- subStrMassShift[names(subStrMassShift) %in% names(mostProbSubStr)] # internal database annotation compMS2_object <- metID(compMS2_object, "dbAnnotate", SubStrs=subStrMassShift, metDB=HMDB, includeElements=c('C', 'H', 'N', 'O', 'P', 'S', 'F', 'Cl', 'Br', 'I'), esiAdducts=mandEsiAdducts, MS1adducts=TRUE) compMS2_object <- metID(compMS2_object, "dbAnnotate", SubStrs=subStrMassShift, metDB=LMSD, includeElements=c('C', 'H', 'N', 'O', 'P', 'S', 'F', 'Cl', 'Br', 'I'), esiAdducts=mandEsiAdducts, MS1adducts=TRUE)
compMS2_object <- metID(compMS2_object, "dbProb", minTimesId=1, verbose=FALSE)
# predict any phase II metabolite structures compMS2_object <- metID(compMS2_object, 'predSMILES')
compMS2_object <- metID(compMS2_object, 'rtPred', verbose=FALSE)
# in silico compMS2_object <- metID(compMS2_object, "metFrag", autoPossId=TRUE, possContam=6, verbose=FALSE) compMS2_object <- metID(compMS2_object, "CFM", autoPossId=TRUE, possContam=6, verbose=FALSE)
optimum correlation and spectral similarity cut-offs are estimated using the network density plateau miminum. If no plateau minima is found then the median value of the downward slope is returned.
# correlation network compMS2_object <- metID(compMS2_object, method='corrNetwork', peakTable, obsNames, corrMethod='pearson', corrThresh=NULL, MTC='none', MS2only=3, maxNodes=7000, minClustSize=3) # spectral similarity compMS2_object <- metID(compMS2_object, method='specSimNetwork', minDotProdThresh=NULL, minClustSize=3)
compMS2_object <- metID(compMS2_object, 'chemSim', autoPossId=TRUE, possContam=6, minEdges=2, minSimScore=0.7, verbose=FALSE)
compMS2_object <- metID(compMS2_object, 'buildConsensus', include=c('massAccuracy', 'inSilico', 'chemSim', 'rtPred', 'substructure', 'pubMed'), autoPossId=TRUE, possContam=6, minMeanBCscore=0.1, verbose=FALSE)
tpSumBC <- trueFalseSum(compMS2_object, verbose=FALSE)
False positives are identified based on summed average word frequency of abstracts returned from PubMed and cleaned. Erroneous drugs, pesticides and antibiotics can be identified and removed and/or flagged by this method.
compMS2_object <- falsePosIdentify(compMS2_object, removeFP=FALSE, prohibKeyWords="drug|pesticide|antibiotic|placebo", meanFreqPerAbs=0.6, verbose=FALSE)
compMS2_object <- annoCompoundClass(compMS2_object, overWrite=TRUE, minSimScore=0.8, verbose=FALSE)
compMS2_object <- compMS2Explorer(compMS2_object)
appName <- paste0('compMS2_object_', toupper(ionMode)) publishApp(compMS2_object, appName=appName, writeDir=paste0(projDir, 'output/'), addFiles=c(paste0(projDir, 'R/', 'compMS2_workflow.pdf')))
# full path to zip file written by publishApp function compMS2_object <- compMS2Explorer(paste0(projDir, 'output/', appName,'/', appName, '.zip'))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.