R/annotationFromPeakTable.R

Defines functions annotationFromPeakTable

Documented in annotationFromPeakTable

#' Title annotationFromPeakTable
#'
#' @param peakTable Peak table containing m/z and tr.
#' @param mgfFile The MGF file containing MS/MS information.
#' @param database The database used for identification.
#' @param ionMode Positive ion mode is 'P', negative ion mode is 'N'.
#' @param CE Collision energy,default value is 'all'.
#' @param tRCalibration T or F, T will do retention time calibration and F will not.
#' @param is.tR.file The retention time of internal standards which saved in a xlsx file.
#' @param MS1DeltaMZ Delta m/z of MS1.
#' @param MS1DeltaTR Delta retention time.
#' @param MS2.sn.threshold The S/N threshold of MS2.
#' @param MS2.noise.intensity The intensity of noise of MS2 spectrum.
#' @param MS2.missing.value.padding The MS2 missing value padding method, "half" or "minimal.value"
#' @param ms2Mode MS2 acquisition mode, 'ida' or 'dia', the default value is 'ida'
#' @param diaMethod If MS2 acquisition mode is "dia", a file save dia method should be provided.
#' @param MS1MS2DeltaTR Delta retention time between MS1 and MS2.
#' @param MS1MS2DeltaMZ Delta m/z between MS1 and MS2.
#' @param MS2DeltaMZ Delta m/z between experimental MS2 and reference MS2.
#' @param scoreMode The MS2 score mode, default is "average".
#'
#' @return annotationFromPeakTableRes
#' @export annotationFromPeakTable
#' @importFrom tcltk tkProgressBar setTkProgressBar
#' @importFrom openxlsx write.xlsx
#' @importFrom stats na.omit
#' @importFrom utils read.csv
#'
#' @examples
#' annotationFromPeakTableRes <- annotationFromPeakTable(
#'     peakTable = system.file("extdata/peakTable","example.csv", package = "MetEx"),
#'     mgfFile = system.file("extdata/mgf","example.mgf", package = "MetEx"),
#'     database = system.file("extdata/database","example_database.xlsx", package = "MetEx"),
#'     ionMode = "P",
#'     MS1DeltaMZ = 0.01,
#'     MS1DeltaTR = 120,
#'     MS1MS2DeltaTR = 5,
#'     MS1MS2DeltaMZ = 0.01,
#'     MS2DeltaMZ = 0.02)
annotationFromPeakTable <- function(peakTable,
                                    mgfFile,
                                    database,
                                    ionMode,
                                    CE = 'all',
                                    tRCalibration = F,
                                    is.tR.file = NA,
                                    MS1DeltaMZ,
                                    MS1DeltaTR,
                                    MS2.sn.threshold = 3,
                                    MS2.noise.intensity = "minimum",
                                    MS2.missing.value.padding = "half",
                                    ms2Mode = 'ida',
                                    diaMethod = 'NA',
                                    MS1MS2DeltaTR,
                                    MS1MS2DeltaMZ,
                                    MS2DeltaMZ,
                                    scoreMode = 'average'){

  MS1RawData <- read.csv(peakTable)
  if (length(which(colnames(MS1RawData)=='tr')) >= 1){
    packageStartupMessage('Row names of MS1 file is Available.')
  } else if (length(which(colnames(MS1RawData)=='tr')) == 0 & length(which(colnames(MS1RawData)=='rt')) >= 1){
    colnames(MS1RawData)[which(colnames(MS1RawData)=='rt')] = 'tr'
    packageStartupMessage('Change rt to tr in row names of MS1.')
  } else {
    packageStartupMessage('Row names of MS1 file is wrong!')
  }
  MSMS.Exp <- vector(mode="character",length = nrow(MS1RawData))
  MSMS.DB <- vector(mode="character",length = nrow(MS1RawData))
  DP <- vector(mode="character",length = nrow(MS1RawData))
  RDP <- vector(mode="character",length = nrow(MS1RawData))
  frag.ratio <- vector(mode="character",length = nrow(MS1RawData))
  score <- vector(mode="character",length = nrow(MS1RawData))
  mz.DB <- vector(mode="character",length = nrow(MS1RawData))
  tr.DB <- vector(mode="character",length = nrow(MS1RawData))
  ID.DB <- vector(mode="character",length = nrow(MS1RawData))
  name <- vector(mode="character",length = nrow(MS1RawData))
  SMILES <- vector(mode="character",length = nrow(MS1RawData))
  Adduct <- vector(mode="character",length = nrow(MS1RawData))
  CE.DB <- vector(mode="character",length = nrow(MS1RawData))

  mgfList <- importMgf(mgfFile)
  mgfMatrix <- mgfList$mgfMatrix
  mgfData <- mgfList$mgfData
  mzinmgf <- as.matrix(mgfMatrix[ , 'pepmassNum'])
  trinmgf <- as.matrix(mgfMatrix[ , 'trNum'])
  dbData <- dbImporter(dbFile=database, ionMode = ionMode, CE = CE)
  if (tRCalibration == T){
    dbData <- retentionTimeCalibration(is.tR.file = is.tR.file, database.df = dbData)
  }

  pb <- tkProgressBar("annotationFromPeakTable","Rate of progress %", 0, 100)
  for (i in c(1:nrow(MS1RawData))){
    info<- sprintf("Rate of progress %d%%", round(i*100/nrow(MS1RawData)))
    setTkProgressBar(pb, i*100/nrow(MS1RawData), sprintf("annotationFromPeakTable (%s)", info),info)

    mz.MS1.i <- MS1RawData$mz[i]
    tr.MS1.i <- MS1RawData$tr[i]
    ms2ActInRaw <- ms1ms2Match(mz = mz.MS1.i,
                              tr = tr.MS1.i,
                              MS1MS2DeltaMZ,
                              deltaTR = MS1MS2DeltaTR,
                              mgfMatrix,
                              mgfData,
                              ms2Mode,
                              diaMethod)
    match.posi <- which(abs(dbData$`m/z` - mz.MS1.i) < MS1DeltaMZ & abs(dbData$tr - tr.MS1.i) < MS1DeltaTR)

    if (length(ms2ActInRaw) == 0 & length(match.posi) == 0){
      DP[i] <- NA
      RDP[i] <- NA
      frag.ratio[i] <- NA
      score[i] <- NA
      MSMS.Exp[i] <- NA
      MSMS.DB[i] <- NA
      mz.DB[i] <- NA
      tr.DB[i] <- NA
      ID.DB[i] <- NA
      name[i] <- NA
      SMILES[i] <- NA
      Adduct[i] <- NA
      CE.DB[i] <- NA
    }
    else if (length(ms2ActInRaw) == 0 & length(match.posi) != 0){
      order.num <- order(abs(dbData$`m/z`[match.posi]-mz.MS1.i)/MS1DeltaMZ * 0.6 + abs(dbData$tr[match.posi]-tr.MS1.i)/MS1DeltaTR * 0.4, decreasing = F)
      DP[i] <- NA
      RDP[i] <- NA
      frag.ratio[i] <- NA
      score[i] <- NA
      MSMS.Exp[i] <- "Can't find MS2"
      MSMS.DB[i] <- as.character(paste(dbData$MSMS[match.posi[order.num]],collapse = " * "))
      mz.DB[i] <- as.character(paste(dbData$`m/z`[match.posi[order.num]],collapse = " * "))
      tr.DB[i] <- as.character(paste(dbData$tr[match.posi[order.num]],collapse = " * "))
      ID.DB[i] <- as.character(paste(dbData$ID.DB[match.posi[order.num]],collapse = " * "))
      name[i] <- as.character(paste(dbData$Name[match.posi[order.num]],collapse = " * "))
      SMILES[i] <- as.character(paste(dbData$SMILES[match.posi[order.num]],collapse = " * "))
      Adduct[i] <- as.character(paste(dbData$LCMS1_AdductName[match.posi[order.num]],collapse = " * "))
      CE.DB[i] <- as.character(paste(dbData$CE[match.posi[order.num]],collapse = " * "))
    }
    else if (length(ms2ActInRaw) != 0 & length(match.posi) == 0){
      DP[i] <- NA
      RDP[i] <- NA
      frag.ratio[i] <- NA
      score[i] <- NA
      ms2ActInRaw.temp <- vector(mode="character",length = length(ms2ActInRaw))
      for (ms2ActInRaw.i in c(1:length(ms2ActInRaw))){
        ms2ActInRaw.temp[ms2ActInRaw.i] <- paste(ms2ActInRaw[[ms2ActInRaw.i]], collapse = ";")
      }
      MSMS.Exp[i] <- paste(ms2ActInRaw.temp, collapse = " * ")
      MSMS.DB[i] <- NA
      mz.DB[i] <- NA
      tr.DB[i] <- NA
      ID.DB[i] <- NA
      name[i] <- NA
      SMILES[i] <- NA
      Adduct[i] <- NA
      CE.DB[i] <- NA
    }
    else if (length(ms2ActInRaw) != 0 & length(match.posi) != 0){

      candidateScore <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidateDP <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidateRDP <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidatefrag.ratio <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidateMSMS.DB <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidateMSMS.Act <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidate.mz.DB <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidate.tr.DB <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidate.ID.DB <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidate.name <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidate.SMILES <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidate.Adduct <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))
      candidate.CE.DB <- matrix(NA, nrow = length(ms2ActInRaw), ncol = length(match.posi))

      for (k in c(1:length(ms2ActInRaw))){
        ms2Act <- ms2ActInRaw[[k]]
        ms2Act <- strsplit(ms2Act, " ", fixed=TRUE)
        ms2Act <- list2dataframe(ms2Act)
        ms2Act <- na.omit(ms2Act)
        ms2Act <- ms2Act[which(ms2Act[,1] < (mz.MS1.i+MS2DeltaMZ)),]

        for (j in c(1:length(match.posi))){
          ms2DB <- as.character(dbData$MSMS[match.posi[j]])
          ms2DB <- strsplit(ms2DB, ";", fixed=TRUE)
          ms2DB <- strsplit(unlist(ms2DB), " ", fixed=TRUE)
          ms2DB <- list2dataframe(ms2DB)
          ms2DB <- na.omit(ms2DB)
          ms2DB <- ms2DB[which(ms2DB[,1] < (ms2DB+MS2DeltaMZ)),]
          if (nrow(ms2Act)==0){
            candidateScore[k,j] <- NA
            candidateDP[k,j] <- NA
            candidateRDP[k,j] <- NA
            candidatefrag.ratio[k,j] <- NA
            candidateMSMS.DB[k,j] <- dbData$MSMS[match.posi[j]]
            candidateMSMS.Act[k,j] <- "Can't find MS2"
            candidate.mz.DB[k,j] <- dbData$`m/z`[match.posi[j]]
            candidate.tr.DB[k,j] <- dbData$tr[match.posi[j]]
            candidate.ID.DB[k,j] <- dbData$ID.DB[match.posi[j]]
            candidate.name[k,j] <- dbData$Name[match.posi[j]]
            candidate.SMILES[k,j] <- dbData$SMILES[match.posi[j]]
            candidate.Adduct[k,j] <- dbData$LCMS1_AdductName[match.posi[j]]
            candidate.CE.DB[k,j] <- dbData$CE[match.posi[j]]
          }else{
            candidateScore[k,j] <- ms2Score(ms2Act, ms2DB, MS2DeltaMZ, sn.threshold = MS2.sn.threshold, noise.intensity = MS2.noise.intensity, missing.value.padding = MS2.missing.value.padding, scoreMode)
            candidateDP[k,j] <- ms2Score(ms2Act, ms2DB, MS2DeltaMZ, sn.threshold = MS2.sn.threshold, noise.intensity = MS2.noise.intensity, missing.value.padding = MS2.missing.value.padding, scoreMode = "obverse")
            candidateRDP[k,j] <- ms2Score(ms2Act, ms2DB, MS2DeltaMZ, sn.threshold = MS2.sn.threshold, noise.intensity = MS2.noise.intensity, missing.value.padding = MS2.missing.value.padding, scoreMode = "reverse")
            candidatefrag.ratio[k,j] <- ms2Score(ms2Act, ms2DB, MS2DeltaMZ, sn.threshold = MS2.sn.threshold, noise.intensity = MS2.noise.intensity, missing.value.padding = MS2.missing.value.padding, scoreMode = "matched.fragments.ratio")
            candidateMSMS.DB[k,j] <- dbData$MSMS[match.posi[j]]
            candidateMSMS.Act[k,j] <- paste(ms2ActInRaw[[k]],collapse=";")
            candidate.mz.DB[k,j] <- dbData$`m/z`[match.posi[j]]
            candidate.tr.DB[k,j] <- dbData$tr[match.posi[j]]
            candidate.ID.DB[k,j] <- dbData$ID.DB[match.posi[j]]
            candidate.name[k,j] <- dbData$Name[match.posi[j]]
            candidate.SMILES[k,j] <- dbData$SMILES[match.posi[j]]
            candidate.Adduct[k,j] <- dbData$LCMS1_AdductName[match.posi[j]]
            candidate.CE.DB[k,j] <- dbData$CE[match.posi[j]]
          }
        }
      }

      order.num <- order(candidateScore,decreasing = T)
      DP[i] <- paste(candidateDP[order.num],collapse = " * ")
      RDP[i] <- paste(candidateRDP[order.num],collapse = " * ")
      frag.ratio[i] <- paste(candidatefrag.ratio[order.num],collapse = " * ")
      score[i] <- paste(candidateScore[order.num],collapse = " * ")
      MSMS.Exp[i] <- paste(candidateMSMS.Act[order.num],collapse = " * ")
      MSMS.DB[i] <- paste(candidateMSMS.DB[order.num],collapse = " * ")
      mz.DB[i] <- paste(candidate.mz.DB[order.num],collapse = " * ")
      tr.DB[i] <- paste(candidate.tr.DB[order.num],collapse = " * ")
      ID.DB[i] <- paste(candidate.ID.DB[order.num],collapse = " * ")
      name[i] <- paste(candidate.name[order.num],collapse = " * ")
      SMILES[i] <- paste(candidate.SMILES[order.num],collapse = " * ")
      Adduct[i] <- paste(candidate.Adduct[order.num],collapse = " * ")
      CE.DB[i] <- paste(candidate.CE.DB[order.num],collapse = " * ")
    }
  }
  close(pb)

  candidate2dataframe <- function(in.vector){
    candidateList <- strsplit(in.vector, " * ", fixed=TRUE)
    candidateDataframe <- data.frame(col1 = NA, col2 = NA, col3 = NA, col4 = NA, col5 = NA)
    # candidateDataframe <- candidateDataframe[-1, ]
    for (i in c(1:length(candidateList))) {
      candidateDataframe[i, "col1"] <- candidateList[[i]][1]
      candidateDataframe[i, "col2"] <- candidateList[[i]][2]
      candidateDataframe[i, "col3"] <- candidateList[[i]][3]
      candidateDataframe[i, "col4"] <- candidateList[[i]][4]
      candidateDataframe[i, "col5"] <- candidateList[[i]][5]
    }
    return(candidateDataframe)
  }


  annotationFromPeakTableRes1 <- cbind(MS1RawData,
                                       candidate2dataframe(MSMS.Exp)[,1],
                                       candidate2dataframe(MSMS.DB)[,1],
                                       candidate2dataframe(DP)[,1],
                                       candidate2dataframe(RDP)[,1],
                                       candidate2dataframe(frag.ratio)[,1],
                                       candidate2dataframe(score)[,1],
                                       candidate2dataframe(mz.DB)[,1],
                                       candidate2dataframe(tr.DB)[,1],
                                       candidate2dataframe(ID.DB)[,1],
                                       candidate2dataframe(name)[,1],
                                       candidate2dataframe(SMILES)[,1],
                                       candidate2dataframe(Adduct)[,1],
                                       candidate2dataframe(CE.DB)[,1])
  colnames(annotationFromPeakTableRes1) <- c(colnames(MS1RawData),"MSMS.Exp","MSMS.DB","DP","RDP","frag.ratio","score","mz.DB","tr.DB","ID.DB","name","SMILES","Adduct","CE.DB")
  annotationFromPeakTableRes2 <- cbind(MS1RawData,
                                       candidate2dataframe(MSMS.Exp)[,2],
                                       candidate2dataframe(MSMS.DB)[,2],
                                       candidate2dataframe(RDP)[,2],
                                       candidate2dataframe(DP)[,2],
                                       candidate2dataframe(frag.ratio)[,2],
                                       candidate2dataframe(score)[,2],
                                       candidate2dataframe(mz.DB)[,2],
                                       candidate2dataframe(tr.DB)[,2],
                                       candidate2dataframe(ID.DB)[,2],
                                       candidate2dataframe(name)[,2],
                                       candidate2dataframe(SMILES)[,2],
                                       candidate2dataframe(Adduct)[,2],
                                       candidate2dataframe(CE.DB)[,2])
  colnames(annotationFromPeakTableRes2) <- c(colnames(MS1RawData),"MSMS.Exp","MSMS.DB","DP","RDP","frag.ratio","score","mz.DB","tr.DB","ID.DB","name","SMILES","Adduct","CE.DB")
  annotationFromPeakTableRes3 <- cbind(MS1RawData,
                                       candidate2dataframe(MSMS.Exp)[,3],
                                       candidate2dataframe(MSMS.DB)[,3],
                                       candidate2dataframe(DP)[,3],
                                       candidate2dataframe(RDP)[,3],
                                       candidate2dataframe(frag.ratio)[,3],
                                       candidate2dataframe(score)[,3],
                                       candidate2dataframe(mz.DB)[,3],
                                       candidate2dataframe(tr.DB)[,3],
                                       candidate2dataframe(ID.DB)[,3],
                                       candidate2dataframe(name)[,3],
                                       candidate2dataframe(SMILES)[,3],
                                       candidate2dataframe(Adduct)[,3],
                                       candidate2dataframe(CE.DB)[,3])
  colnames(annotationFromPeakTableRes3) <- c(colnames(MS1RawData),"MSMS.Exp","MSMS.DB","DP","RDP","frag.ratio","score","mz.DB","tr.DB","ID.DB","name","SMILES","Adduct","CE.DB")
  annotationFromPeakTableRes4 <- cbind(MS1RawData,
                                       candidate2dataframe(MSMS.Exp)[,4],
                                       candidate2dataframe(MSMS.DB)[,4],
                                       candidate2dataframe(DP)[,4],
                                       candidate2dataframe(RDP)[,4],
                                       candidate2dataframe(frag.ratio)[,4],
                                       candidate2dataframe(score)[,4],
                                       candidate2dataframe(mz.DB)[,4],
                                       candidate2dataframe(tr.DB)[,4],
                                       candidate2dataframe(ID.DB)[,4],
                                       candidate2dataframe(name)[,4],
                                       candidate2dataframe(SMILES)[,4],
                                       candidate2dataframe(Adduct)[,4],
                                       candidate2dataframe(CE.DB)[,4])
  colnames(annotationFromPeakTableRes4) <- c(colnames(MS1RawData),"MSMS.Exp","MSMS.DB","DP","RDP","frag.ratio","score","mz.DB","tr.DB","ID.DB","name","SMILES","Adduct","CE.DB")
  annotationFromPeakTableRes5 <- cbind(MS1RawData,
                                       candidate2dataframe(MSMS.Exp)[,5],
                                       candidate2dataframe(MSMS.DB)[,5],
                                       candidate2dataframe(DP)[,5],
                                       candidate2dataframe(RDP)[,5],
                                       candidate2dataframe(frag.ratio)[,5],
                                       candidate2dataframe(score)[,5],
                                       candidate2dataframe(mz.DB)[,5],
                                       candidate2dataframe(tr.DB)[,5],
                                       candidate2dataframe(ID.DB)[,5],
                                       candidate2dataframe(name)[,5],
                                       candidate2dataframe(SMILES)[,5],
                                       candidate2dataframe(Adduct)[,5],
                                       candidate2dataframe(CE.DB)[,5])
  colnames(annotationFromPeakTableRes5) <- c(colnames(MS1RawData),"MSMS.Exp","MSMS.DB","DP","RDP","frag.ratio","score","mz.DB","tr.DB","ID.DB","name","SMILES","Adduct","CE.DB")
  annotationFromPeakTableRes.list <- list(candidate.1 = annotationFromPeakTableRes1,
                                          candidate.2 = annotationFromPeakTableRes2,
                                          candidate.3 = annotationFromPeakTableRes3,
                                          candidate.4 = annotationFromPeakTableRes4,
                                          candidate.5 = annotationFromPeakTableRes5)
  # write.xlsx(annotationFromPeakTableRes.list, file = result.file)
  return(annotationFromPeakTableRes.list)
}
zhengfj1994/MeTEA documentation built on June 29, 2021, 5:21 a.m.