R/subfunctionsIdentification.R

Defines functions crossTables organizeResults checkIntensityRules combineChains checkClass ddaFrags coelutingFrags findCandidates

Documented in checkClass checkIntensityRules coelutingFrags combineChains crossTables ddaFrags findCandidates organizeResults

# findCandidates
#' Search of lipid candidates of a certain class
#'
#' Search of lipid candidates from a peaklist based on a set of expected
#' adducts.
#'
#' @param MS1 peaklist of the MS function. Data frame with 3 columns: mz, RT (in seconds) and int (intensity).
#' @param db database (i.e. pcdb, dgdb, etc.). Data frame with at least 2 columns: Mass (exact mass) and total (total number of carbons and double bound of the FA chains, i.e. "34:1").
#' @param ppm m/z tolerance in ppm.
#' @param rt rt range where the function will look for candidates. By default,
#' it will search within all RT range in MS1.
#' @param adducts character vector containing the expected adducts to search
#' for (i.e. "M+H", "M+Na", "M-H", etc.). See details.
#' @param rttol rt tolerance in seconds to match adducts.
#' @param dbs list of data bases required for the annotation. By default, dbs
#' contains the required data frames based on the default fragmentation rules.
#' If these rules are modified, dbs may need to be changed. If data bases have
#' been customized using \link{createLipidDB}, they also have to be modified
#' here.
#' @param rawData raw scans data. Output of \link{dataProcessing} function (MS1$rawData).
#' @param coelCutoff coelution score threshold between parent and fragment ions.
#' Only applied if rawData info is supplied.
#'
#' @return Data frame with the found candidates. It contains 6 columns: mz,
#' RT, int (from the peaklist data.frame), ppms, cb (total number of carbons and
#' double bounds of the FA chains) and adducts.
#'
#' @details \link{findCandidates} looks for matches between the m/z of the
#' MS1 peaklist and the expected m/z of the candidates in the database for each
#' adduct. If several adducts are expected, results are combined.
#'
#' Adducts allowed are contained in adductsTable data frame, which can be
#' modified if required (see \link{adductsTable}).
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
findCandidates <- function(MS1,
                           db,
                           ppm,
                           rt,
                           adducts,
                           rttol = 3,
                           dbs,
                           rawData = data.frame(),
                           coelCutoff = 0){
  adductsTable <- dbs$adductsTable
  count <- 0
  candidates <- vector()
  for (i in 1:length(adducts)){
    ad <- unique(adductsTable[adductsTable$adduct == adducts[i],])
    prec <- findPrecursor(MS1, db, ppm, massdif = ad$mdiff,
                          rt = rt, n = ad$n, charge = ad$charge)
    if (nrow(prec) > 0){
      prec <- cbind(prec, adducts = as.vector(adducts[i]))
      if (count == 0){
        candidates <- rbind(candidates, prec)
        count <- 1
      } else {
        candidates <- crossAdducts(df1 = candidates, df2 = prec, rttol = rttol,
                                   rawData = rawData, coelCutoff = coelCutoff)
      }
    }
  }
  if(length(candidates) > 0){
    candidates <- filtrateAdducts(df = candidates)
  }
  rownames(candidates) <- c()
  if (is.vector(candidates)){
    return(data.frame())
  } else {
    return(candidates)
  }
}

# coelutingFrags
#' Coeluting fragments extraction
#'
#' Given a RT and a list of peaks, this function subsets all coeluting fragments
#' within a rt windows. It is used by identification functions to extract
#' coeluting fragments from high energy functions for candidate precursor ions.
#'
#' @param precursors candidates data frame. Output of \link{findCandidates}.
#' @param products peaklist for MS2 function (MSMS).
#' @param rttol rt window in seconds.
#' @param rawData raw scans data. Output of \link{dataProcessing} function (MSMS$rawData).
#' @param coelCutoff coelution score threshold between parent and fragment ions.
#' Only applied if rawData info is supplied.
#'
#' @return List of data frames with the coeluting fragments for each candidate.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
coelutingFrags <- function(precursors,
                           products,
                           rttol,
                           rawData = data.frame(),
                           coelCutoff = 0){
  psubset <- apply(precursors, 1, function(x){
    peaks <- which(products[,"RT"] <= as.numeric(x["RT"])+(rttol/2) & products[,"RT"] >=
                     as.numeric(x["RT"])-(rttol/2))
    if(length(peaks) > 0){
      df <- products[peaks,]
      scores <- coelutionScore(as.character(x["peakID"]), df$peakID, rawData)
      df$coelScore <- scores
      df <- df[scores >= coelCutoff,]
      return(df)
    } else {
      return(data.frame())
    }
  })
  coelfrags <- lapply(psubset, function(x) {
    if (nrow(x) > 0){
      x <- x[!is.na(x$mz),]
    } else {data.frame()}
  })
  return(coelfrags)
}

# ddaFrags
#' MS/MS scan extraction of a precursor in DDA
#'
#' This function searches for the closest precursor selected for MS2 in DDA
#' that matches mz tolerance and RT window of a list of candidates and extracts
#' their fragments.
#'
#' @param candidates candidates data frame. Output of \link{findCandidates}.
#' @param precursors data frame with the whole list of precursors selected for MS2.
#' @param rawData raw scans data.
#' @param rawData peaklist for MS2 function (MSMS).
#' @param ppm m/z tolerance in ppm.
#'
#' @details MS2 scans for a given precursor are searched within a rt window from
#' minrt-rttol/2 to maxrt+rttol/2. If the same precursor was selected several
#' times along the peak, the closest scan to the rt at the peak maximum is
#' selected for annotation.
#' 
#' Coelution score for DDA fragments represents their relative intensity within 
#' the MS2 scan.
#'
#' @return List of data frames with the fragments for each candidate.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
ddaFrags <- function(candidates,
                     precursors,
                     rawData,
                     ppm){
  coelfrags <- apply(candidates, 1, function(x){
    scans <- findMS2precursor(mz = as.numeric(x["mz"]),
                              minrt = as.numeric(x["minRT"]),
                              maxrt = as.numeric(x["maxRT"]),
                              precursors = precursors,
                              ppm = ppm)
    if (length(scans) > 0){
      rts <- precursors$RT[match(scans, precursors$Scan)]
      scanrt <- scans[which.min(abs(rts-as.numeric(x["RT"])))]
      f <- rawData[rawData$Scan == scanrt, c("mz", "RT", "int", "peakID")]
      f <- f[grepl("MS2", f$peakID),]
      if(nrow(f) > 0){
        f$coelScore <- 0 # previously f$int/sum(f$int), now this value goes to scoreInt
        return(f)
      } else {
        return(data.frame())
      }
    } else {
      return(data.frame())
    }
  })
}

# checkClass
#' Search of class fragments to confirm the lipid class.
#'
#' Search of characteristic fragments that confirm a given lipid class.
#'
#' @param candidates output of \link{findCandidates} function.
#' @param coelfrags list of peaks coeluting with each candidate. Output of
#' \link{coelutingFrags}.
#' @param clfrags vector containing the expected fragments for a given lipid
#' class. See details.
#' @param ftype character vector indicating the type of fragments in clfrags.
#' It can be: "F" (fragment), "NL" (neutral loss) or "BB" (building block).
#' See details.
#' @param clrequisites logical vector indicating if each class fragment is
#' required or not. If none of the fragment is required, at least one of them
#' must be present within the coeluting fragments. If the presence of any
#' fragment excludes the class, it can be specified by using "excluding".
#' @param ppm m/z tolerance in ppm.
#' @param dbs list of data bases required for the annotation. By default, dbs
#' contains the required data frames based on the default fragmentation rules.
#' If these rules are modified, dbs may need to be changed. If data bases have
#' been customized using \link{createLipidDB}, they also have to be modified
#' here. It is employed when some fragment belongs to "BB" \code{ftype}.
#'
#'
#' @details \code{clfrags}, \code{ftype} and \code{clrequisites} will indicate
#' the rules to confirm a lipid class. All three arguments must have the same
#' length.
#'
#' This function allows three different types of fragments: fragments with a
#' specific m/z as for example 227.0326 for PG in negative mode, which needs to
#' be defined as clfrags = c(227.0326) and ftype = c("F"); neutral losses such
#' as the head group of some PL (i.e. NL of 74.0359 in PG in negative mode),
#' which will be defined as clfrags = c(74.0359) and ftype = c("NL"); or
#' building blocks resulting from the loss of some groups, as for example, PA as
#' M-H resulting from the loss of the head group (glycerol) in PG in ESI-, which
#' will be defined as clfrags = c("pa_M-H") and ftype = c("BB"). The last
#' two options could define the same fragments. In this case just one of them
#' would be necessary.
#'
#' When using the third type of fragment ("BB"), the building block will be
#' specified in lower case (i.e. pa, dg, lysopa, mg, etc.) and the adduct will be
#' given as it appears in the adductsTable, both separated by "_". Names for the
#' building blocks are the ones used for the LipidMS databases without the "db"
#' at the end.
#'
#' In case the presence of a fragment indicates that the candidate does not
#' belong to the lipid class (i.e. loss of CH3 in PE, which corresponds to a PC
#' actually), this will be specified by using \code{clrequisites} = c("excluding").
#'
#' @return List with 2 elements: a matrix with logical values (presence/absense)
#' of each expected fragment (columns) for each candidate (rows), and a logical
#' vector with the confirmation of the lipid class for each candidate.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
checkClass <- function(candidates,
                       coelfrags,
                       clfrags,
                       ftype,
                       clrequisites,
                       ppm = 10,
                       dbs){

  adductsTable <- dbs$adductsTable
  allfragments <- rep(list(data.frame()), nrow(candidates))
  if (length(clfrags) == 0){
    return(list(presence = matrix(), passed = rep("No class fragments",
                                                  nrow(candidates))))
  } else {
    verified <- matrix(NA, ncol=length(clfrags), nrow = nrow(candidates))
    for (i in 1:length(clfrags)){
      if (ftype[i] == "F"){
        classf <- lapply(coelfrags, function(x){
          filtermsms(x, as.numeric(clfrags[i]), ppm)})
        verified[,i] <- unlist(lapply(classf, function(x) {if(is.data.frame(x)){nrow(x) > 0}else {FALSE}}))
        allfragments <- Map(rbind, classf, allfragments)
      } else if (ftype[i] == "NL") {
        classf <- list()
        for (c in 1:nrow(candidates)){
          f <- filtermsms(coelfrags[[c]], candidates$mz[c]-as.numeric(clfrags[i]), ppm)
          classf[[c]] <- f
        }
        verified[,i] <- unlist(lapply(classf, function(x) {if(is.data.frame(x)){nrow(x) > 0}else {FALSE}}))
        allfragments <- Map(rbind, classf, allfragments)
      } else if (ftype[i] == "BB"){
        db <- dbs[[paste(unlist(strsplit(clfrags[i], "_"))[1], "db", sep="")]]
        mdiff <- adductsTable[adductsTable$adduct ==
                                unlist(strsplit(clfrags[i], "_"))[2], "mdiff"]
        classf <- list()
        for (c in 1:nrow(candidates)){
          f <- filtermsms(coelfrags[[c]],
                          db$Mass[which(db$total == candidates$cb[c])]+ mdiff,
                          ppm)
          classf[[c]] <- f
        }
        verified[,i] <- unlist(lapply(classf, function(x) {if(is.data.frame(x)){nrow(x) > 0}else {FALSE}}))
        allfragments <- Map(rbind, classf, allfragments)
      }
    }
    if (any(clrequisites == T | clrequisites == "TRUE")){ # when there are required fragments, we check them
      passed <- apply(verified, 1,
                      function(x) sum(x == T & (clrequisites == T |
                                                  clrequisites == "TRUE"))) ==
        sum(clrequisites == T | clrequisites == "TRUE")
    } else { # if there isnt any required fragment, any of them will be enough
      passed <- apply(verified, 1, sum) > 0
    }
    if (any(clrequisites == "excluding")){
      excl <- which(clrequisites == "excluding")
      for (e in 1:length(excl)){
        presence = verified[,excl[e]]
        passed[presence == T] = FALSE
      }
    }
    return(list(presence = verified, passed = passed, fragments = allfragments))
  }
}

# chainFrags
#' Search of chain specific fragments
#'
#' Search of specific fragments that inform about the chains structure.
#'
#' @param coelfrags coeluting fragments for each candidate. Output of
#' \link{coelutingFrags}.
#' @param chainfrags character vector containing the fragmentation rules for
#' the chain fragments. If it is an empty vector, chains will be calculated based
#' on the difference between the precursor and the other chain. See details.
#' @param ppm m/z tolerance in ppm.
#' @param candidates candidates data frame. If any chain needs to be calculated
#' based on the difference between the precursor and the other chain, this
#' argument will be required. Output of \link{chainFrags}.
#' @param f known chains. If any chain needs to be calculated
#' based on the difference between the precursor and the other chain, this
#' argument will be required. Output of \link{chainFrags}.
#' @param dbs list of data bases required for the annotation. By default, dbs
#' contains the required data frames based on the default fragmentation rules.
#' If these rules are modified, dbs may need to be changed. If data bases have
#' been customized using \link{createLipidDB}, they also have to be modified
#' here.
#'
#' @details The chainfrags argument must contain the fragmentation rules
#' which inform about the chains structure. For example, in the case of PG
#' subclass, the chain in sn1 position is identified by the lysoPG as M-H
#' resulting from the loss of the FA chain of sn2; and the chain in sn2 position
#' is identified as the free FA chain as M-H. These two fragments need to be
#' searched in two different steps: in the fist step we will look for lysoPGs
#' coeluting with the precursor using chainfrags = c("lysopg_M-H");
#' then, we will look for FA chains using chainfrags = c("fa_M-H"). This
#' information can be combined later using \link{combineChains} function.
#'
#' To indicate the fragments to be searched, the class of lipid is writen
#' using the same names as the LipidMS databases without the "db" at the end
#' (i.e. pa, dg, lysopa, mg, CE, etc.), and the adduct has to be indicated as
#' it appears in the adductsTable, both parts separated by "_". In case some
#' chain needs to be searched based on a neutral loss, this can be defined using
#' "NL-" prefix, followed by the database and adduct. If this neutral loss
#' is employed to find the remaining chain, "cbdiff-" prefix allows to calculate
#' the difference in carbons and doubles bounds between the precursor and the
#' building block found. For example, "cbdiff-dg_M+H-H2O" will look for DG as
#' M+H-H2O and then, it will return the difference between their number of
#' carbons and double bounds and the ones from the precursor. Otherwise,
#' "NL-mg_M+H-H2O" will look for fragments coming from the loss of MGs.
#'
#' In case these fragments identified as losses from the precursors are going to
#' be employed for the intensity rules, this same prefix has to be added.
#'
#' If a chain is calculated based on the difference of total number of
#' carbons and double bounds between the precursor and a previously searched chain,
#' \code{chainfrags} argument must be a character vector c("") and candidates
#' data frame and chain fragments list must be provided.
#'
#' @return List of data frames with the chain fragments found.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
chainFrags <- function (coelfrags,
                       chainfrags,
                       ppm = 10,
                       candidates,
                       f = NULL,
                       dbs){
  adductsTable <- dbs$adductsTable
  matches <- list()
  for (c in 1:nrow(candidates)) {
    fragments <- vector()
    for (i in 1:length(chainfrags)) {
      if (chainfrags[i] == ""){
        if (nrow(f[[c]]) > 0) {
          diffs <- diffcb(candidates$cb[c], f[[c]]$cb,
                          dbs$fadb)
          fmatch <- unique(data.frame(cb = diffs, mz = 0, RT = f[[c]]$RT,
                                      int = 0, peakID = "",
                                      coelScore = NA,
                                      db = "cbdiff", adduct = "",
                                      stringsAsFactors = F))
          fmatch <- fmatch[fmatch$cb != "", ]
          fragments <- rbind(fragments, fmatch)
        }
      }
      if (grepl("cbdiff-", chainfrags[i])) {
        rule <- unlist(strsplit(chainfrags[i], "cbdiff-"))[2]
        db <- dbs[[paste(unlist(strsplit(rule, "_"))[1],
                         "db", sep = "")]]
        ad <- adductsTable[adductsTable$adduct ==
                             unlist(strsplit(rule, "_"))[2], ]
        mdiff <- ad$mdiff
        charge <- ad$charge
        n <- ad$n
        fmatch <- frags(coelfrags[[c]], ppm=ppm, db, mdiff, charge, n)
        if (nrow(fmatch) > 0) {
          fragments <-
            rbind(fragments,
                  data.frame(fmatch, db = paste(unlist(strsplit(chainfrags[i],
                                                                "_"))[1], sep = ""),
                             adduct = unlist(strsplit(chainfrags[i], "_"))[2]))
        }
      } else if (grepl("NL-", chainfrags[i])) {
        rule <- unlist(strsplit(chainfrags[i], "NL-"))[2]
        db <- dbs[[paste(unlist(strsplit(rule, "_"))[1],
                         "db", sep = "")]]
        db$Mass <- candidates$mz[c] - db$Mass
        ad <- adductsTable[adductsTable$adduct ==
                             unlist(strsplit(rule, "_"))[2],]
        mdiff <- ad$mdiff
        charge <- ad$charge
        n <- ad$n
        fmatch <- frags(coelfrags[[c]], ppm, db, mdiff, charge, n)
        if (nrow(fmatch) > 0) {
          fragments <-
            rbind(fragments,
                  data.frame(fmatch,
                             db = paste(unlist(strsplit(chainfrags[i],
                                                        "_"))[1], sep = ""),
                             adduct = unlist(strsplit(chainfrags[i], "_"))[2]))
        }
      } else {
        rule <- chainfrags[i]
        db <- dbs[[paste(unlist(strsplit(rule, "_"))[1],
                         "db", sep = "")]]
        if (grepl("Mn", unlist(strsplit(chainfrags[i],
                                        "_"))[2])) {
          mdiff <- as.numeric(unlist(strsplit(unlist(strsplit(chainfrags[i],
                                                              "_"))[2], "Mn")))[2]
          charge <- 1
          n <- 1
        } else {
          ad <- adductsTable[adductsTable$adduct ==
                               unlist(strsplit(chainfrags[i], "_"))[2],]
          mdiff <- ad$mdiff
          charge <- ad$charge
          n <- ad$n
        }
        fmatch <- frags(coelfrags[[c]], ppm, db, mdiff, charge, n)
        if (nrow(fmatch) > 0) {
          fragments <- rbind(fragments,
                             data.frame(fmatch,
                                        db = unlist(strsplit(chainfrags[i], "_"))[1],
                                        adduct = unlist(strsplit(chainfrags[i],
                                                                 "_"))[2]))
        }
      }
    }
    if (is.data.frame(fragments)) {
      matches[[c]] = unique(fragments)
    } else {
      matches[[c]] = data.frame()
    }
  }
  if (any(grepl("cbdiff", chainfrags))) {
    new <- list()
    for (c in 1:nrow(candidates)) {
      if (nrow(matches[[c]]) > 0) {
        fragments <- matches[[c]][grepl("cbdiff", matches[[c]]$db),]
        other <- matches[[c]][!grepl("cbdiff", matches[[c]]$db),]
        if (nrow(fragments) > 0){
          db <- diffcb(candidates$cb[c], fragments$cb, dbs$fadb)
          fragments$cb <- db
          new[[c]] <- rbind(other, fragments[fragments$cb != "", ])
        } else {
          new[[c]] <- matches[[c]]
        }
      } else {
        new[[c]] <- data.frame()
      }
    }
    matches <- new
  }
  return(matches)
}

# combineChains
#' Combine chain fragments that could belong to the same precursor.
#'
#' It calculates combinations of chain fragments that sum up the same number of
#' carbons and double bounds as the precursor.
#'
#' @param candidates candidates data frame. Output of \link{findCandidates}.
#' @param nchains number of chains of the targeted lipid class.
#' @param sn1 list of chain fragments identified for sn1 position. Output of
#' \link{chainFrags}.
#' @param sn2 list of chain fragments identified for sn2 position. Output of
#' \link{chainFrags}. If required.
#' @param sn3 list of chain fragments identified for sn3 position. Output of
#' \link{chainFrags}. If required.
#' @param sn4 list of chain fragments identified for sn4 position. Output of
#' \link{chainFrags}. If required.
#'
#' @return List of data frames with candidate chains structures.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
combineChains <- function(candidates,
                          nchains,
                          sn1,
                          sn2,
                          sn3,
                          sn4){
  if (nchains == 1){
    selected <- list()
    fragments <- rep(list(list()), nrow(candidates))
    for (c in 1:nrow(candidates)){
      fragments[[c]][[1]] <- sn1[[c]][which(sn1[[c]]$cb == candidates$cb[c]),]
      if (nrow(fragments[[c]][[1]]) > 0){
        selected[[c]] <- data.frame(sn1 = candidates$cb[[c]], stringsAsFactors = F)
      } else {
        selected[[c]] <- data.frame()
      }
    }
  }
  if (nchains == 2){
    chains1 <- list()
    chains2 <- list()
    for (c in 1:nrow(candidates)){
      if (nrow(sn1[[c]]) > 0){
        chains1[[c]] <- sn1[[c]]
      } else {
        chains1[[c]] <- data.frame()
      }
      if (nrow(sn2[[c]]) > 0){
        chains2[[c]] <- sn2[[c]]
      } else {
        chains2[[c]] <- data.frame()
      }
    }
    selected <- list()
    fragments <- rep(list(list()), nrow(candidates))
    for (c in 1:nrow(candidates)){
      if(length(chains1[[c]]) > 0 & length(chains2[[c]]) > 0){
        x <- expand.grid(unique(chains1[[c]]$cb), unique(chains2[[c]]$cb),
                         stringsAsFactors = F)
        comb <- list()
        for (i in 1:nrow(x)) {
          comb[[i]] <- paste(x[i, 1], x[i, 2], sep=" ")
        }
        sel <- unlist(lapply(comb, select, candidates$cb[c], n=2))
        selected[[c]] <- data.frame(sn1 = x[sel,1], sn2 = x[sel,2],
                                    stringsAsFactors = F)
        if (nrow(selected[[c]]) > 0){
          for (u in 1:nrow(selected[[c]])){
            fragments[[c]][[u]] <- unique(rbind(chains1[[c]][chains1[[c]]$cb %in%
                                                               selected[[c]]$sn1[u],],
                                                chains2[[c]][chains2[[c]]$cb %in%
                                                               selected[[c]]$sn2[u],]))
          }
        }
      } else {
        selected[[c]] <- data.frame()
        fragments[[c]] <- list()
      }
    }
  }
  if (nchains == 3){
    chains1 <- list()
    chains2 <- list()
    chains3 <- list()
    for (c in 1:nrow(candidates)){
      if (nrow(sn1[[c]]) > 0){
        chains1[[c]] <- sn1[[c]]
      } else {
        chains1[[c]] <- data.frame()
      }
      if (nrow(sn2[[c]]) > 0){
        chains2[[c]] <- sn2[[c]]
      } else {
        chains2[[c]] <- data.frame()
      }
      if (nrow(sn3[[c]]) > 0){
        chains3[[c]] <- sn3[[c]]
      } else {
        chains3[[c]] <- data.frame()
      }
    }
    selected <- list()
    fragments <- rep(list(list()), nrow(candidates))
    for (c in 1:nrow(candidates)){
      if(length(chains1[[c]]) > 0 & length(chains2[[c]]) > 0 &
         length(chains3[[c]]) > 0){
        x <- expand.grid(unique(chains1[[c]]$cb), unique(chains2[[c]]$cb),
                         unique(chains3[[c]]$cb), stringsAsFactors = F)
        comb <- list()
        for (i in 1:nrow(x)) {
          comb[[i]] <- paste(x[i, 1], x[i, 2], x[i, 3], sep=" ")
        }
        sel <- unlist(lapply(comb, select, candidates$cb[c], n=3))
        selected[[c]] <- data.frame(sn1 = x[sel,1], sn2 = x[sel,2],
                                    sn3 = x[sel,3], stringsAsFactors = F)
        if (nrow(selected[[c]]) > 0){
          for (u in 1:nrow(selected[[c]])){
            fragments[[c]][[u]] <- unique(rbind(chains1[[c]][chains1[[c]]$cb %in%
                                                               selected[[c]]$sn1[u],],
                                                chains2[[c]][chains2[[c]]$cb %in%
                                                               selected[[c]]$sn2[u],],
                                                chains3[[c]][chains3[[c]]$cb %in%
                                                               selected[[c]]$sn3[u],]))
          }
        }
      } else {
        selected[[c]] <- data.frame()
        fragments[[c]] <- list()
      }
    }
  }
  if (nchains == 4){
    chains1 <- list()
    chains2 <- list()
    chains3 <- list()
    chains4 <- list()
    for (c in 1:nrow(candidates)){
      if (nrow(sn1[[c]]) > 0){
        chains1[[c]] <- sn1[[c]]
      } else {
        chains1[[c]] <- vector()
      }
      if (nrow(sn2[[c]]) > 0){
        chains2[[c]] <- sn2[[c]]
      } else {
        chains2[[c]] <- vector()
      }
      if (nrow(sn3[[c]]) > 0){
        chains3[[c]] <- sn3[[c]]
      } else {
        chains3[[c]] <- vector()
      }
      if (nrow(sn4[[c]]) > 0){
        chains4[[c]] <- sn4[[c]]
      } else {
        chains4[[c]] <- vector()
      }
    }
    selected <- list()
    fragments <- rep(list(list()), nrow(candidates))
    for (c in 1:nrow(candidates)){
      if(length(chains1[[c]]) > 0 & length(chains2[[c]]) > 0 &
         length(chains3[[c]]) > 0 & length(chains4[[c]]) > 0){
        x <- expand.grid(unique(chains1[[c]]$cb), unique(chains2[[c]]$cb),
                         unique(chains3[[c]]$cb), unique(chains4[[c]]$cb),
                         stringsAsFactors = F)
        comb <- list()
        for (i in 1:nrow(x)) {
          comb[[i]] <- paste(x[i, 1], x[i, 2], x[i, 3], x[i, 4], sep=" ")
        }
        sel <- unlist(lapply(comb, select, candidates$cb[c], n=4))
        selected[[c]] <- data.frame(sn1 = x[sel,1], sn2 = x[sel,2],
                                    sn3 = x[sel,3], sn4 = x[sel,4],
                                    stringsAsFactors = F)
        if (nrow(selected[[c]]) > 0){
          for (u in 1:nrow(selected[[c]])){
            fragments[[c]][[u]] <- unique(rbind(chains1[[c]][chains1[[c]]$cb %in%
                                                               selected[[c]]$sn1[u],],
                                                chains2[[c]][chains2[[c]]$cb %in%
                                                               selected[[c]]$sn2[u],],
                                                chains3[[c]][chains3[[c]]$cb %in%
                                                               selected[[c]]$sn3[u],],
                                                chains4[[c]][chains4[[c]]$cb %in%
                                                               selected[[c]]$sn4[u],]))
          }
        }
      } else {
        selected[[c]] <- data.frame()
        fragments[[c]] <- list()
      }
    }
  }
  return(list(selected = selected, fragments = fragments))
}

# checkIntensityRules
#' Check intensity rules
#'
#' Check intensity rules to confirm chains position.
#'
#' @param intrules character vector specifying the fragments to compare. See
#' details.
#' @param rates character vector with the expected rates between fragments given
#' as a string (i.e. "3/1"). See details.
#' @param intrequired logical vector indicating if any of the rules is required.
#' If not, at least one must be verified to confirm the structure.
#' @param nchains number of chains of the targeted lipid class.
#' @param combinations output of \link{combineChains}.
#'
#' @details This function will be employed when the targeted lipid class has
#' more than one chain.
#'
#' Taking PG subclass as an example, intensities of lysoPG fragments
#' (informative for sn1) can be employed to confirm the chains structure
#' (intrules = c("lysopg_sn1/lysopg_sn1")). In this case, the intensity of the
#' lysoPG resulting from the loss of the FA chain in sn2 is at least 3 times
#' greater (rates = c("3/1")) than the lysoPG resulting from the loss of the FA
#' chain in sn1.
#'
#' For the intrules argument, "/" will be use to separate the fragments related
#' to each chain (sn1/sn2/etc), and "_" will be use to indicate the list in
#' which they'll be searched. This will depend on the chain fragments
#' rules defined previously. Following the example, as we use lysoPG to define
#' the sn1 position, both fragments will be searched in this list (sn1).
#'
#' For classes with more than one FA chain, if some intensity rule should be
#' employed to identify their position but they are no defined yet, use "Unknown".
#' If it is not necessary because the fragmentation rules are informative enough
#' to define the position (i.e. sphingolipid species), just leave an empty vector.
#'
#' @return List of logical vectors  with the confirmation for each combination.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
checkIntensityRules <- function(intrules,
                                rates,
                                intrequired,
                                nchains,
                                combinations){
  intCheck <- list()
  for (c in 1:length(combinations$selected)){
    intCheck[[c]] <- checkIntRules(intrules, rates, intrequired,
                                   nchains = nchains,
                                   combinations = combinations$selected[[c]],
                                   sn = combinations$fragments[[c]])
  }
  return(intCheck)
}

# organizeResults
#' Prepare output for LipidMS annotation functions
#'
#' Prepare a readable output for LipidMS identification functions.
#'
#' @param candidates candidates data frame. Output of \link{findCandidates}.
#' @param coelfrags list of coeluting fragments for each candidate
#' @param clfrags vector containing the expected fragments for a given lipid
#' class.
#' @param classConf output of \link{checkClass}
#' @param chainsComb output of \link{combineChains}
#' @param intrules character vector specifying the fragments to compare. See
#' \link{checkIntensityRules}.
#' @param intConf output of \link{checkIntensityRules}
#' @param nchains number of chains of the targeted lipid class.
#' @param class character value. Lipid class (i.e. PC, PE, DG, TG, etc.).
#' @param acquisitionmode acquisition mode (DIA or DDA).
#' 
#' @details Coelution score for DIA data is calculated as the mean coelution 
#' score of all fragments used for annotation, while for DDA data, the intensity
#' score is given, which is calculated as the sum of the relative intensities of 
#' the fragments used for annotation.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
organizeResults <- function(candidates,
                            coelfrags,
                            clfrags,
                            classConf,
                            chainsComb,
                            intrules,
                            intConf,
                            nchains,
                            class, 
                            acquisitionmode){
  results <- vector()
  if (nrow(candidates) > 0){
    if (nchains == 0){
      for (c in 1:nrow(candidates)){
        if (length(clfrags) > 0){
          idlevel <- "Subclass"
          if (classConf$passed[c] == T){
            score <- mean(classConf$fragments[[c]]$coelScore, na.rm = T)
            scoreInt <- sum(classConf$fragments[[c]]$int, na.rm = T)
          }
        } else {
          idlevel <- "MS-only"
          score <- 0
          scoreInt <- 0
        }
        if (classConf$passed[c] == T ||
            classConf$passed[c] == "No class fragments"){
          results <- rbind(results,
                           data.frame(ID = paste(class, "(", candidates$cb[c], 
                                                 ")", sep = ""),
                                      Class = class,
                                      CDB = candidates$cb[c],
                                      FAcomp = candidates$cb[c],
                                      mz = candidates$mz[c],
                                      RT = candidates$RT[c],
                                      int = candidates$int[c],
                                      Adducts = candidates$adducts[c],
                                      ppm = round(candidates$ppms[c], 3),
                                      confidenceLevel = idlevel,
                                      peakID = candidates$peakID[c],
                                      Score = round(score, 3),
                                      ScoreInt = round(scoreInt/sum(coelfrags[[c]]$int), 3),
                                      stringsAsFactors = F))
        }
      }
    } else if (nchains == 1){
      for (c in 1:nrow(candidates)){
        if (classConf$passed[c] == T || length(clfrags) == 0){
          Class <- class
          CDB <- candidates$cb[c]
          mz <- candidates$mz[c]
          RT <- candidates$RT[c]
          int <- candidates$int[c]
          Adducts <- candidates$adducts[c]
          ppm <- round(candidates$ppm[c], 3)
          ID <- paste(class, "(", CDB, ")", sep = "")
          FAcomp <- CDB
          if (nrow(chainsComb$selected[[c]]) > 0){
            confidenceLevel <- "FA"
            if (length(clfrags) > 0){
              if (acquisitionmode == "DIA"){
                score <-  mean(c(classConf$fragments[[c]]$coelScore,
                                 chainsComb$fragments[[c]]$coelScore), na.rm = T)
              } else {
                score <- sum(c(classConf$fragments[[c]]$coelScore,
                                chainsComb$fragments[[c]]$coelScore), na.rm = T)
              }
              scoreInt <- sum(c(classConf$fragments[[c]]$int,
                                chainsComb$fragments[[c]]$int), na.rm = T)
            } else {
              if (acquisitionmode == "DIA"){
                score <-  mean(chainsComb$fragments[[c]]$coelScore, na.rm = T)
              } else {
                score <- sum(chainsComb$fragments[[c]]$coelScore, na.rm = T)
              }
              scoreInt <-  sum(chainsComb$fragments[[c]]$int, na.rm = T)
            }
            results <- rbind(results, data.frame(ID, Class, CDB, FAcomp, mz, RT,
                                                 int, Adducts, ppm, confidenceLevel,
                                                 peakID = candidates$peakID[c],
                                                 Score = round(score, 3),
                                                 ScoreInt = round(scoreInt/sum(coelfrags[[c]]$int), 3),
                                                 stringsAsFactors = F))
          } else if (length(clfrags) > 0){
            confidenceLevel <- "Subclass"
            if (acquisitionmode == "DIA"){
              score <- mean(classConf$fragments[[c]]$coelScore, na.rm = T)
            } else {
              score <- sum(classConf$fragments[[c]]$coelScore, na.rm = T)
            }
            scoreInt <- sum(classConf$fragments[[c]]$int, na.rm = T)
            results <- rbind(results, data.frame(ID, Class, CDB, FAcomp, mz, RT,
                                                 int, Adducts, ppm, confidenceLevel,
                                                 peakID = candidates$peakID[c],
                                                 Score = round(score, 3),
                                                 ScoreInt = round(scoreInt/sum(coelfrags[[c]]$int), 3),
                                                 stringsAsFactors = F))
          }
        }
      }
    } else if (nchains == 2){
      for (c in 1:nrow(candidates)){
        if (classConf$passed[c] == T || length(clfrags) == 0){
          Class <- class
          CDB <- candidates$cb[c]
          mz <- candidates$mz[c]
          RT <- candidates$RT[c]
          int <- candidates$int[c]
          Adducts <- candidates$adducts[c]
          ppm <- round(candidates$ppm[c], 3)
          if (nrow(chainsComb$selected[[c]]) > 0){
            for (comb in 1:nrow(chainsComb$selected[[c]])){
              if (intConf[[c]][comb] == T || length(intrules) == 0){
                ID <- paste(class, "(", paste(chainsComb$selected[[c]][comb,],
                                              collapse="/"),
                            ")", sep="")
                confidenceLevel <- "FA position"
              } else if (intConf[[c]][comb] == F || "Unknown" %in% intrules){
                ID <- paste(class, "(", paste(sort(unlist(chainsComb$selected[[c]][comb,])),
                                              collapse="_"),
                            ")", sep="")
                confidenceLevel <- "FA"
              }
              if (length(clfrags) > 0){
                if (acquisitionmode == "DIA"){
                  score <-  mean(c(classConf$fragments[[c]]$coelScore,
                                   chainsComb$fragments[[c]][[comb]]$coelScore),
                                 na.rm = T)
                } else {
                  score <-  sum(c(classConf$fragments[[c]]$coelScore,
                                   chainsComb$fragments[[c]][[comb]]$coelScore),
                                 na.rm = T)
                }
                scoreInt <- sum(c(classConf$fragments[[c]]$int,
                                  chainsComb$fragments[[c]][[comb]]$int),
                                na.rm = T)
              } else {
                if (acquisitionmode == "DIA"){
                  score <-  mean(chainsComb$fragments[[c]][[comb]]$coelScore, na.rm = T)
                } else {
                  score <-  sum(chainsComb$fragments[[c]][[comb]]$coelScore, na.rm = T)
                }
                scoreInt <- sum(chainsComb$fragments[[c]][[comb]]$int, na.rm = T)
              }
              FAcomp <- paste(chainsComb$selected[[c]][comb,], collapse = " ")
              results <- rbind(results, data.frame(ID, Class, CDB, FAcomp, mz,
                                                   RT, int, Adducts, ppm,
                                                   confidenceLevel,
                                                   peakID = candidates$peakID[c],
                                                   Score = round(score, 3),
                                                   ScoreInt = round(scoreInt/sum(coelfrags[[c]]$int), 3),
                                                   stringsAsFactors = F))
            }
          } else {
            if (length(clfrags) > 0){
              ID <- paste(class, "(", CDB, ")", sep="")
              confidenceLevel <- "Subclass"
              FAcomp = ""
              if (acquisitionmode == "DIA"){
                score <- mean(classConf$fragments[[c]]$coelScore, na.rm = T)
              } else {
                score <- sum(classConf$fragments[[c]]$coelScore, na.rm = T)
              }
              scoreInt <- sum(classConf$fragments[[c]]$int, na.rm = T)
              results <- rbind(results, data.frame(ID, Class, CDB, FAcomp, mz,
                                                   RT, int, Adducts, ppm,
                                                   confidenceLevel,
                                                   peakID = candidates$peakID[c],
                                                   Score = round(score, 3),
                                                   ScoreInt = round(scoreInt/sum(coelfrags[[c]]$int), 3),
                                                   stringsAsFactors = F))
            }
          }
        }
      }
      if (!is.vector(results)){
        keep <- rep(NA, nrow(results))
        for (r in 1:nrow(results)){
          if (is.na(keep[r])){
            dup <- which(results$mz == results$mz[r] & results$RT ==
                           results$RT[r])
            dup <- dup[dup >= r]
            if (length(dup) > 1){
              rep <- duplicated(lapply(sapply(results$FAcomp[dup],
                                              strsplit, " "), sort))
              rep <- dup[which(rep)]
              all <- c(r, rep)
              tokeep <- all[which.max(LipidMS::confLevels[
                results$confidenceLevel[all], "order"])]
              if (results$confidenceLevel[tokeep] == "FA position"){
                tokeep <- all[which(results$confidenceLevel[all] ==
                                      results$confidenceLevel[tokeep])]
              }
              keep[tokeep] <- TRUE
              keep[setdiff(all, tokeep)] <- FALSE
            } else {
              keep[r] <- TRUE
            }
          }
        }
        results <- results[keep,]
        results <- unique(results)
      }
    } else if (nchains %in% c(3,4)){
      for (c in 1:nrow(candidates)){
        if (classConf$passed[c] == T || length(clfrags) == 0){
          Class <- class
          CDB <- candidates$cb[c]
          mz <- candidates$mz[c]
          RT <- candidates$RT[c]
          int <- candidates$int[c]
          Adducts <- candidates$adducts[c]
          ppm <- round(candidates$ppm[c], 3)
          if (nrow(chainsComb$selected[[c]]) > 0){
            for (comb in 1:nrow(chainsComb$selected[[c]])){
              if (intConf[[c]][comb] == T || length(intrules) == 0){
                ID <- paste(class, "(", paste(chainsComb$selected[[c]][comb,],
                                              collapse="/"), ")", sep="")
                confidenceLevel <- "FA position"
              } else if (intConf[[c]][comb] == F || intrules == "Unknown"){
                ID <- paste(class, "(", paste(sort(unlist(chainsComb$selected[[c]][comb,])),
                                              collapse="_"), ")", sep="")
                confidenceLevel <- "FA"
              }
              if (length(clfrags) > 0){
                if (acquisitionmode == "DIA"){
                  score <-  mean(c(classConf$fragments[[c]]$coelScore,
                                   chainsComb$fragments[[c]][[comb]]$coelScore),
                                 na.rm = T)
                } else {
                  score <-  sum(c(classConf$fragments[[c]]$coelScore,
                                   chainsComb$fragments[[c]][[comb]]$coelScore),
                                 na.rm = T)
                }
                scoreInt <-  sum(c(classConf$fragments[[c]]$int,
                                   chainsComb$fragments[[c]][[comb]]$int),
                                 na.rm = T)
              } else {
                if (acquisitionmode == "DIA"){
                  score <-  mean(chainsComb$fragments[[c]][[comb]]$coelScore,
                                 na.rm = T)
                } else {
                  score <-  sum(chainsComb$fragments[[c]][[comb]]$coelScore,
                                 na.rm = T)
                }
                scoreInt <-  sum(chainsComb$fragments[[c]][[comb]]$int,
                                 na.rm = T)
              }
              FAcomp <- paste(chainsComb$selected[[c]][comb,], collapse = " ")
              results <- rbind(results, data.frame(ID, Class, CDB, FAcomp, mz, RT,
                                                   int, Adducts, ppm,
                                                   confidenceLevel,
                                                   peakID = candidates$peakID[c],
                                                   Score = round(score, 3),
                                                   ScoreInt = round(scoreInt/sum(coelfrags[[c]]$int), 3),
                                                   stringsAsFactors = F))
            }
          } else {
            if (length(clfrags) > 0){
              ID <- paste(class, "(", CDB, ")", sep="")
              confidenceLevel <- "Subclass"
              FAcomp = ""
              if (acquisitionmode == "DIA"){
                score <- mean(classConf$fragments[[c]]$coelScore, na.rm = T)
              } else {
                score <- sum(classConf$fragments[[c]]$coelScore, na.rm = T)
              }
              scoreInt <- sum(classConf$fragments[[c]]$int, na.rm = T)
              results <- rbind(results, data.frame(ID, Class, CDB, FAcomp, mz,
                                                   RT, int, Adducts, ppm,
                                                   confidenceLevel,
                                                   peakID = candidates$peakID[c],
                                                   Score = round(score, 3),
                                                   ScoreInt = round(scoreInt/sum(coelfrags[[c]]$int), 3),
                                                   stringsAsFactors = F))
            }
          }
        }
      }
      if (!is.vector(results)){
        keep <- rep(NA, nrow(results))
        for (r in 1:nrow(results)){
          if (is.na(keep[r])){
            dup <- which(results$mz == results$mz[r] & results$RT == results$RT[r])
            dup <- dup[dup >= r]
            if (length(dup) > 1){
              rep <- duplicated(lapply(sapply(results$FAcomp[dup], strsplit, " "), sort))
              rep <- dup[which(rep)]
              all <- c(r, rep)
              tokeep <- all[which.max(LipidMS::confLevels[
                results$confidenceLevel[all], "order"])]
              if (results$confidenceLevel[tokeep] == "FA position"){
                tokeep <- all[which(results$confidenceLevel[all] ==
                                      results$confidenceLevel[tokeep])]
              }
              keep[tokeep] <- TRUE
              keep[setdiff(all, tokeep)] <- FALSE
            } else {
              keep[r] <- TRUE
            }
          }
        }
        results <- results[keep,]
        results <- unique(results)
      }
    }
  }
  if (is.vector(results)){
    results <- data.frame()
  }
  #===========================#
  # exceptions in lipid names
  #===========================#
  # ceramides
  results$ID <- gsub("^Cer\\(", "Cer\\(d", results$ID)
  # CerP
  results$ID <- gsub("^CerP\\(", "CerP\\(d", results$ID)
  # SM
  results$ID <- gsub("^SM\\(", "SM\\(d", results$ID)
  # acylcer
  acylcer <- grep("AcylCer", results$ID)
  if (length(acylcer) > 0){
    for (a in 1:length(acylcer)){
      chains <- unlist(strsplit(results$ID[acylcer[a]], "[\\/_]"))
      if (length(chains) == 3){
        chains[2] <- paste("d", chains[2], sep="")
        if (results$confidenceLevel[acylcer[a]] == "FA"){
          results$ID[acylcer[a]] <- paste(paste(chains[1], chains[2], sep="-"), 
                                          chains[3], sep = "_")
        } else if (results$confidenceLevel[acylcer[a]] == "FA position"){
          results$ID[acylcer[a]] <- paste(paste(chains[1], chains[2], sep="-"), 
                                          chains[3], sep = "/")
        }
      }
    }
  }
  return(results)
}

# crossTables
#' Cross the original MS1 peaklist with the annotation results
#'
#' Cross the original MS1 peaklist with the annotation results.
#'
#' @param msobject annotated msobject
#' @param ppm mass tolerance in ppm.
#' @param rttol rt tolerance to match peaks in seconds.
#' @param dbs list of data bases required for annotation. By default, dbs
#' contains the required data frames based on the default fragmentation rules.
#' If these rules are modified, dbs may need to be supplied. See \link{createLipidDB}
#' and \link{assignDB}.
#'
#' @return msobject with an annotatedPeaklist, which is a data frame with 6 
#' columns: mz, RT, int, LipidMSid, adduct and confidence level for the 
#' annotation. When multiple IDs are proposed for the same feature, they are 
#' sorted based on the annotation level and score.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
crossTables <- function(msobject,
                        ppm = 5,
                        rttol = 10,
                        dbs){
  
  MS1 <- msobject$peaklist$MS1
  results <- msobject$annotation$results
  
  if (nrow(results) > 0){
    # load dbs
    if (missing(dbs)){
      dbs <- assignDB()
    }
    adductsTable <- dbs$adductsTable
    confLevels <- LipidMS::confLevels
    
    # reorder results based on the annotation degree
    results <- results[order(factor(results$confidenceLevel,
                                    levels= c("FA position", "FA",
                                              "Subclass", "MSMS",
                                              "MS-only")), 
                             -results$ScoreInt, 
                             -results$Score),]
    
    # Neutral mass for lipids in the results table
    Form_Mn <- do.call(rbind, apply(results, 1, getFormula, dbs = dbs))
    Mn <- Form_Mn[,"Mn"]
    
    # vectors to save annotations
    lipidMSid <- rep("", nrow(MS1))
    lipidMSadduct <- rep("", nrow(MS1))
    lipidMSlevel <- rep("", nrow(MS1))
    lipidMSscore <- rep("", nrow(MS1))
    lipidMSscoreInt <- rep("", nrow(MS1))
    
    # Go over results and add unique annotations to the previous vectors 
    for (r in 1:nrow(results)){
      adducts <- unlist(strsplit(as.character(results$Adducts)[r], ";"))
      mzs <- vector()
      for (a in 1:length(adducts)){
        adinfo <- adductsTable[adductsTable$adduct == adducts[a],]
        mz <- (adinfo$n*Mn[r]+adinfo$mdif)/abs(adinfo$charge)
        mzs <- append(mzs, mz)
      }
      if (length(mzs) > 0){
        for (m in 1:length(mzs)){
          if (results$peakID[r] != "" && m == 1){
            rows <- which(MS1$peakID == results$peakID[r])
            matches <- c(1:length(rows))
          } else {
            if (sum(abs(MS1$RT- as.numeric(results$RT[r])) <= rttol) > 0){
              rows <- which(abs(MS1$RT - as.numeric(results$RT[r])) <= rttol)
              matches <- as.numeric(unlist(sapply(mzs[m], mzMatch, MS1$mz[rows], ppm)))
              if (length(matches) > 0){
                matches <- matches[seq(1, length(matches), 2)]
              } else {
                matches <- vector()
              }
            } else {
              matches <- vector()
            }
          }
          if (length(matches) > 0){
            for (i in 1:(length(matches))){
              # if no id has been assigned yet
              if (lipidMSid[rows[matches[i]]] == ""){
                
                lipidMSid[rows[matches[i]]] <- as.character(results$ID[r])
                lipidMSlevel[rows[matches[i]]] <- as.character(results$confidenceLevel[r])
                lipidMSadduct[rows[matches[i]]] <- as.character(adducts[m])
                lipidMSscore[rows[matches[i]]] <- as.character(results$Score[r])
                lipidMSscoreInt[rows[matches[i]]] <- as.character(results$ScoreInt[r])
                
                add <- FALSE
                
                # if some id has been assigned but it is different to the new one 
              } else if (lipidMSid[rows[matches[i]]] != "" &&
                         !grepl(results$ID[r],
                                lipidMSid[rows[matches[i]]], fixed = T)){
                newid <- chains(results$ID[r])
                oldids <- sapply(unlist(strsplit(lipidMSid[rows[matches[i]]], "\\|")), 
                                 chains, simplify = FALSE)
                
                if (length(newid) > 3){
                  if (!any(unlist(lapply(oldids, function(x) 
                    all(x[3:length(x)] %in% newid[3:length(newid)]))))){
                    add <- TRUE
                  }
                } else {
                  if (!any(unlist(lapply(oldids, function(x) 
                    all(sumChains(x[3:length(x)], length(x)-2) %in% newid[3:length(newid)])))) | 
                    !any(unlist(lapply(oldids, function(x) 
                      all(x[3:length(x)] %in% sumChains(newid[3:length(newid)], length(newid)-2)))))){
                    add <- TRUE
                  } else {
                    add <- FALSE
                  }
                }
                if (add){
                  # finally, if the new id has to be added, sort all ids based on
                  # annotation level and coelution score
                  conf <- confLevels[confLevels$level == results$confidenceLevel[r], "order"]
                  conf2 <- unlist(strsplit(lipidMSlevel[rows[matches[i]]], "\\|"))
                  conf2 <- sapply(conf2, function(x) confLevels[confLevels$level == x,"order"])
                  score <- results$Score[r]
                  score2 <- as.numeric(unlist(strsplit(lipidMSscore[rows[matches[i]]], "\\|")))
                  scoreInt <- results$ScoreInt[r]
                  scoreInt2 <- as.numeric(unlist(strsplit(lipidMSscoreInt[rows[matches[i]]], "\\|")))
                  
                  ord <- order(c(conf, conf2), c(scoreInt, scoreInt2), 
                               c(score, score2), decreasing = TRUE)
                  
                  if (length(ord) == 0){
                    # if anything goes wrong add the new id in at the last position
                    lipidMSid[rows[matches[i]]] <-
                      paste(lipidMSid[rows[matches[i]]],
                            as.character(results$ID[r]), sep="|")
                    lipidMSlevel[rows[matches[i]]] <-
                      paste(lipidMSlevel[rows[matches[i]]],
                            as.character(results$confidenceLevel[r]), sep="|")
                    lipidMSadduct[rows[matches[i]]] <-
                      paste(as.character(lipidMSadduct[rows[matches[i]]]),
                            as.character(adducts[m]), sep="|")
                    lipidMSscore[rows[matches[i]]] <-
                      paste(lipidMSscore[rows[matches[i]]],
                            as.character(results$Score[r]), sep="|")
                    lipidMSscoreInt[rows[matches[i]]] <-
                      paste(lipidMSscoreInt[rows[matches[i]]],
                            as.character(results$ScoreInt[r]), sep="|")
                  } else {
                    # else, add the new id and reorder
                    ids <- c(results$ID[r],
                             unlist(strsplit(as.character(lipidMSid[rows[matches[i]]]), "\\|")))
                    conflev <- c(results$confidenceLevel[r],
                                 unlist(strsplit(as.character(lipidMSlevel[rows[matches[i]]]), "\\|")))
                    adduc <- c(adducts[m],
                               unlist(strsplit(as.character(lipidMSadduct[rows[matches[i]]]), "\\|")))
                    score <- c(results$Score[r],
                               unlist(strsplit(as.character(lipidMSscore[rows[matches[i]]]), "\\|")))
                    scoreInt <- c(results$ScoreInt[r],
                               unlist(strsplit(as.character(lipidMSscoreInt[rows[matches[i]]]), "\\|")))
                    lipidMSid[rows[matches[i]]] <- paste(ids[ord], collapse="|")
                    lipidMSlevel[rows[matches[i]]] <- paste(conflev[ord], collapse="|")
                    lipidMSadduct[rows[matches[i]]] <- paste(adduc[ord], collapse="|")
                    lipidMSscore[rows[matches[i]]] <- paste(score[ord], collapse="|")
                    lipidMSscoreInt[rows[matches[i]]] <- paste(scoreInt[ord], collapse="|")
                  }
                }
              }
            }
          }
        }
      }
    }
    # Join info and return
    rawPeaks <- data.frame(MS1, 
                           LipidMSid = lipidMSid, 
                           Adduct = lipidMSadduct,
                           confidenceLevel = lipidMSlevel,
                           Score = lipidMSscore,
                           ScoreInt = lipidMSscoreInt)
  } else {
    rawPeaks <- data.frame(MS1, 
                           LipidMSid = "", 
                           Adduct = "",
                           confidenceLevel = "",
                           Score = "",
                           ScoreInt = "")
  }
  msobject$annotation$annotatedPeaklist <- rawPeaks
  
  return(msobject)
}

Try the LipidMS package in your browser

Any scripts or data that you put into this service are public.

LipidMS documentation built on May 29, 2024, 11:24 a.m.