R/Functions_extract_Scans.R

Defines functions makeScanlist2 listMS2scans saveScanlist

Documented in listMS2scans makeScanlist2 saveScanlist

#' Parentsearch
#'
#' Make a list of all MS2 scans with the defined parent masses at a given retention time
#' 
#' @param xcmsRaws list of xcmsRaw objects
#' @param mz parent mz values (mumeric vector)
#' @param rt parent retention time (in seconds, numeric vector), needs to be same length as mz
#' @param ppm parent mz tolerance in ppm
#' @param rtw parent rt tolerance in seconds
#' 
#' @return a data.frame with metadata for MS2 scans that match the request
#'
#' @export
Parentsearch <- function (xcmsRaws,
                          mz,
                          rt,
                          ppm = 5,
                          rtw = 200){
  
  fx <- function(rfile, mz, rt, ppm, rtw){
    
    if(length(rfile@msnPrecursorMz) > 0 ){
      sel <- which( abs((rfile@msnPrecursorMz - mz)) < ppm*mz*1e-6
                    &  abs(rfile@msnRt - rt ) < rtw )
      
      if(length(sel) >0){
        scantab <- data.frame(file = rep( basename(rfile@filepath[[1]]) ,length(sel)),
                              acquisition = rfile@msnAcquisitionNum[sel],
                              scan = sel,
                              rt = rfile@msnRt[sel],
                              parentMz = rfile@msnPrecursorMz[sel],
                              parentIntensity = rfile@msnPrecursorIntensity[sel],
                              charge = rfile@msnPrecursorCharge[sel],
                              ppm = 1e6*(rfile@msnPrecursorMz[sel]-mz)/mz,
                              stringsAsFactors = F)
        
        return(scantab)
      }
    }
    
    
    return(NULL)
  }
  res <- list()
  for(i in length(mz)){
    res <- c(res, lapply(xcmsRaws, fx, mz[i], rt[i], ppm, rtw))
  }
  
  return(data.table::rbindlist(res))
}


#' saveScanlist
#'
#' makes a string representation of a MS2 parent list, including only the 
#' basenames and scan # (not acquisition #) of MS2 scans in that list.
#'
#' @param scanlist a scanlist as generated by makeScanlist2 or Parentsearch()
#'
#' @return a character vector
#'
#' @export
saveScanlist <- function(scanlist){
  #TODO: transition to acquisitionNum
  
  if(nrow(scanlist) <1){
    return("")
  }
  
  return(paste(sapply(split(scanlist, scanlist$file), function(x){paste0(x$file[1],":",paste(x$scan, collapse = " "))}), collapse = "|"))
  
}

#' listMS2scans
#'
#' Searches for MS2 scans with a given parent m/z and directly converts the
#'  resulting table into a single string
#'
#' Also, see \code{\link{Parentsearch}}
#' 
#' @param mz expected target (parent) m/z
#' @param rt expected target (parent) retention time in seconds
#' @param ppm target (parent) m/z tolerance in ppm
#' @param rtw target (parent) rt tolerance in seconds
#' @param MSData list of xcmsRaw objects
#' @param rtMatch assign MS2 scans only to the matching feature with the closest rt
#' 
#' @return a character vector
#'
#' @export
listMS2scans <- function(mz,rt,ppm,rtw,MSData, rtMatch = F){
  
  step1 <- mapply(function(mz,rt,ppm,rtw,MSData){
    Parentsearch(MSData, mz, rt, ppm, rtw)
    
  }, mz, rt, MoreArgs = list(ppm = ppm, rtw = rtw, MSData = MSData), SIMPLIFY = F
  
  )
  
  if(rtMatch){
    step1 <- mapply(function(df,num){
      if(!is.null(df) && nrow(df) > 0){
        #df <- as.data.frame(df, stringsAsFactors = F)
        df$tempgroup <- num
      }
      return(df)
    } ,step1, seq(length(step1)), SIMPLIFY = F)
    
    bound <- rbindlist(step1)
    
    bound$dupfind <- paste0(bound$file,"#",bound$acquisition)
    bound$deltart <- abs(bound$rt - rt[bound$tempgroup])
    
    spl <- split(bound, bound$dupfind)
    
    spl <- lapply(spl, function(df){
      return(df[which.min(df$deltart),])
    })
    
    bound <- rbindlist(spl)
    
    step1 <- lapply(seq(length(step1)), function(n, tab){
      fin <- tab$tempgroup == n
      if(!any(fin)){return(data.table::data.table(NULL))}
      tab[fin,colnames(tab)[!colnames(tab) %in% c("tempgroup", "dupfind", "deltart")], with = F]
      
    }, bound)
    
    
  }
  
  
  
  step2 <- sapply( step1, saveScanlist)
  
  return(
    step2
  )
  
}

#' makeScanlist2
#' 
#' Make a scan list
#' 
#' @param splitme character string with format 
#' filename:scannumber scannumber|filename: scannumber... as generated 
#' by saveScanlist()
#' @param MSData list of xcmsRaw objects. Only scans from files loaded in 
#' this object will be returned
#' @param MS2 if TRUE, looks for MS2 scans
#' 
#' @return a data.frame with metadata for MS2 scans as specified by \code{splitme}
#' @importFrom data.table data.table
#' 
#' @export
makeScanlist2 <- function(splitme, MSData = NULL, MS2 = T){
  
  split1 <-  strsplit(splitme, "\\|")
  
  split2 <-  lapply(split1, strsplit, "\\:")
  
  split3 <- lapply(split2, function(x){ lapply(x, function(y){ scs <- strsplit(y[2], " ")[[1]]
  return(data.table(file = rep(y[1], length(scs)),
                    scan = as.integer(scs)))}) })
  #now consolidate into data.tables
  split4 <- lapply(split3, data.table::rbindlist)
  
  if(!is.null(MSData)){
    
    split4 <- lapply(split4, function(scantab, MSData, MS2){
      
      if(any(basename(names(MSData)) %in%  scantab$file)){
        
        scantab <- as.data.frame(scantab, stringsAsFactors = F)
        
        scantab$acquisition <- numeric(nrow(scantab))
        scantab$rt <- numeric(nrow(scantab))
        scantab$parentMz <- numeric(nrow(scantab))
        scantab$parentIntensity <- numeric(nrow(scantab))
        #  print(scantab)
        
        for(i in unique(scantab$file)){
          
          rawsel <- which(basename(names(MSData)) ==  i)
          if(length(rawsel) > 0){
            rawsel <- rawsel[1]
            
            tabsel <- scantab$file == i
            #   print(rawsel)
            
            scantab$acquisition[tabsel] <-  MSData[[rawsel]]@msnAcquisitionNum[scantab$scan[tabsel]]
            scantab$rt[tabsel] <-  MSData[[rawsel]]@msnRt[scantab$scan[tabsel]]
            scantab$parentMz[tabsel] <-  MSData[[rawsel]]@msnPrecursorMz[scantab$scan[tabsel]]
            scantab$parentIntensity[tabsel] <-  MSData[[rawsel]]@msnPrecursorIntensity[scantab$scan[tabsel]]
            
          }
        }
        
      }
      
      
      return(scantab)
    }, MSData, MS2)
    
  }
  
  return(split4)
  
}
mjhelf/Metaboseek documentation built on April 23, 2022, 12:09 p.m.