R/metaboWeizFunctions.R

#!/usr/bin/Rscript
# matchWeiz.R version="0.1"

options(stringsAsFactors = FALSE)

#'@import Rdisop
#'@importFrom fdrtool phalfnorm
#'@importFrom fdrtool sd2theta
#'@importFrom CHNOSZ makeup 
#'@importFrom CHNOSZ as.chemical.formula

run_RT_module = function (plList,polarity,mixType,project,massTol,confint=0.99) {

  print("=========================")
  print("Run RT correction module:")
  print("=========================")  
  
  data (rtReftables)
  refTable = rtReftables[[polarity]]

  rt.shift.model = rtModule(plList,polarity,refTable,project,massTol)
  
  if (!all(is.na(rt.shift.model))) {
    save (	
      rt.shift.model,
      file=paste (mixType,project,polarity,Sys.Date(),"Rdata",sep=".")		
    )
    print ("Saved RT shift model:")
    print (paste(mixType,project,polarity,Sys.Date(),"Rdata",sep="."))
  } else {
    print ("No RT matches - please check data manually!")
    stop ("Error - RT model cannot be built")
  }
  print ("**** RT Module - OK ****")
  return (rt.shift.model)
}
#' # Run matching and get annotations in a single ionization mode#' 
#' Match input LC-MS features to the given MS library.
#' 
#' The 'matchWeiz' multi-modular search method is applied to match an input of biologically derived sample data to the 
#' given MS library. Retention time (RT) correction is first applied to the input peak list using a group of peak lists
#' of predefined mixes of chemical standards (see \code{stdMix} data description). Several constant variables are used 
#' except the input arguments, including exact mass values for possible adducts and the peak search parameter 
#' \emph{GROUP_TH} which sets the minimum number of matches required to annotate a group of features. These constants 
#' can be viewed and edited by loading the data object: \code{data("mmSettings")}. 
#' The output of the function is an R object containing all annotation data, which then needs to be converted to a 
#' readable text format using the function \code{summarizeMWresults}. Please note that two MS channels (low/high) need
#' to be available in the peak lists as data columns named with the suffix '01' and '02', respectively (e.g.: 'IL_140816_6701'
#'  and 'IL_140816_6702' would describe data of the same biological sample, but originating from channels Ms1 and Ms2).
#' 
#' @param cameraPeaklist Path to a tab delimited peak list file, as generated by the CAMERA package.
#' @param rtCorrFiles Path to a directory containing peak list files corresponding with a predefined mix of chemical standards (see below).
#' @param polarity The MS ionization mode (either "negative" or "positive") .
#' @param project Any descriptive name for the current search run.
#' @param MSlib MS library R object as generated by the \code{buildMSlibrary} function.
#' @param massTol The mass tolarance for mass-to-mass matches, in ppm (default is 20 ppm)
#' @return NULL on a succesful completion.
#' @examples\donttest{
#' data(sampleLib)
#' tomPL_neg <- system.file("extdata/Tom_all_negative_2ch_xan.tsv",package="matchWeiz")
#' rtFiles_neg <- system.file("extdata/rt_files_neg",package="matchWeiz")
#' runMatch (tomPL_neg,rtFiles_neg,"negative","tom_test",MSlib.neg) 
#' }
#' @export
runMatch = function(
  cameraPeaklist,   
  rtCorrFiles,
  polarity, 
  project,
  MSlib,
  massTol = 20
) {    
  
  # RT correction mix type - at the moment this is the single option used.
  mixType ="stdMix"
  
  set.seed(Sys.Date())
  
  require(Rdisop)
  data("mmSettings")
  
  ## 	Read sample peak list
  print ("Reading peak list file:")
  print (cameraPeaklist)
  tryCatch({
    pl = read.delim(cameraPeaklist)
  }, error=function(err) {
    stop("failed to read input sample peak list file: ", conditionMessage(err))
  })
  intCol.samp1 = min (grep(".*0[1|2]$", names(pl)))
  intCol.samp2 = max (grep(".*0[1|2]$", names(pl)))  
  # parse short sample name
  plName = strsplit(basename(cameraPeaklist),split="\\.(txt|csv|tsv)")[[1]][1]
  # remove NAs (in case fill-peaks hasn't been done)
  pl[is.na(pl[,intCol.samp1:intCol.samp2]),intCol.samp1:intCol.samp2] = abs(jitter(0))
  # compute a non-adaptive mass tolerance  values (Da.)
  pl$massTol = round(massTol*pl$mz/1e6,4)

  print ("Reading peak list file:")
  print ("rtCorrFiles")
  tryCatch({
      plList = lapply(dir(rtCorrFiles,full.names=TRUE),read.delim)
    #    plList = lapply(rtCorrFiles,read.delim)
  }, error=function(err) {
    stop("failed to read stdMix peak list file: ", conditionMessage(err))
  })
  ##
  ##  Create the RT correction model - using the rt correction module (according to previously defined model)
  ##
  rtModel = run_RT_module(plList,polarity,mixType,project,massTol)
  coeff = coef (rtModel$model)
  # Compute shifted RT 
  pl$rtShifted = pl$rt*1/coeff[2] - coeff[1]
  # Bound lower shifted RT values to minimum RT
  pl$rtShifted[pl$rtShifted<min(pl$rt)] = min(pl$rt)
  # Bound higher shifted RT values to maximum RT
  pl$rtShifted[pl$rtShifted>max(pl$rt)] = max(pl$rt)
  # Get RT search toerance from model
  rtTol = round(rtModel$CI,1) 
  ##
  ##	Do innitial  mz-rt annotation - use 'annotate feature' function
  ##	- which returns a DB peak match(es) for each input peak according
  ##	to mz-rt match, or otherwise NULL.
  ##
  print ("Annotating individual features:")
  print (paste("RT tolerance:",rtTol,"s."))
  
  peak.annot = apply (pl[c("mz","rtShifted","massTol")],1, function(peak) {
    annotateFeature(peak,polarity,MSlib,rtTol) 
  })  
  print (paste("Matched",sum(!sapply(peak.annot,is.null)),"peaks"))
  ##
  ##	Multiple DB peak matches according to DB ids are grouped and returned in the 
  ##	slot 'grouped'. Unmatched or single-peak groups are kept in a slot: 'unmatched' 		
  ##
  if (length(peak.annot)) { 
    print ("Annotating peak groups:")
    idList = getIds(peak.annot,MSlib,pl,intCol.samp1,intCol.samp2,polarity,rtModel)
  } else {
    stop ("\nNo library matches found - please check your input data (correct ionization mode?)")
  }	
  save (idList, file=paste(project,polarity,"idList.RData",sep="_"))
  print ("Done")
  return (NULL)
}
AsaphA/matchWeiz documentation built on May 5, 2019, 8:12 a.m.