MS DDA or DIA Data

Documented in assignDB createLipidDB getInclusionList ploteicmsbatch plotLipids plotticmsbatch rtdevplot searchIsotopes searchIsotopesmsbatch

# plotLipids
#' Plot informative peaks for lipid annotation
#'
#' Plot informative peaks for each lipid annotated with idPOS and idNEG (or
#' similar functions).
#'
#' @param msobject annotated msobject.
#' @param span smoothing parameter. Numeric value between 0 and 1.
#' @param ppm mz tolerance for EIC. If set to 0, the EIC will not be shown.
#' @param verbose print information messages.
#'
#' @return msobject with a plots element which contains a list of plots.
#' Plots on the left side represent raw values while plots on the left are
#' smoothed or clean scans (MS2 in DDA).
#'
#' @details Peak intensities are relative to the maximum intensity of each peak
#' to ease visualization.
#' 
#' Grey lines show the the extracted ion chromatograms for the peaks.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
plotLipids <- function(msobject, span = 0.4, ppm = 10, verbose = TRUE){

  ##############################################################################
  # Check arguments and inputs
  if (!"results" %in% names(msobject$annotation)){
    stop("No results to be plotted")
  }
  if (span < 0 | span > 1){
    span <- 0.4
    warning("span parameter set to 0.4")
  }
  if ("plots" %in% names(msobject$annotation)){
    if(verbose){cat("\n Removing previous plots...")}
    msobject$annotation$plots <- NULL
    if(verbose){cat("OK")}
  }
  results <- msobject$annotation$results[order(msobject$annotation$results$mz),]
  msobject$annotation$plots <- list()
  ##############################################################################
  # For each lipid in the results data frame, extract peaks
  while (nrow(results) > 0){
    r <- results$peakID[1]
    rclass <- results$Class[1]
    ############################################################################
    # MS1 peaks
    toremove <- which(results$peakID == r & results$Class == rclass)
    parent <- results[results$peakID == r & results$Class == rclass,]
    if (nrow(parent) > 1){
      parent$ID[1] <- paste(parent$ID, collapse = "|")
      parent$peakID[1] <- paste(unique(parent$peakID), collapse = "|")
      parent <- parent[1,]
    }
    adducts <- unlist(strsplit(parent$Adducts, ";"))
    ms1 <- c()
    for (a in adducts){
      ss <- msobject$annotation$detailsAnnotation[[parent$Class]]$candidates
      ss <- ss[grepl(gsub("+", "\\+", as.character(paste("^", a, sep="")), fixed = TRUE),
                     msobject$annotation$detailsAnnotation[[parent$Class]]$candidates$adducts),]
      ss <- ss[ss$cb == parent$CDB,]
      ss <- ss[order(abs(ss$RT - parent$RT), decreasing = FALSE),]
      ms1 <- rbind(ms1, ss[1,])
    }
    ms1$adducts <- as.character(sapply(ms1$adducts, function(x) unlist(strsplit(x, ";"))[[1]]))
    peaksMS1 <- ms1$peakID
    mzpeaksMS1 <- ms1$mz # to use in case data is DDA
    namesMS1 <- paste(as.character(round(ms1$mz, 3)), ms1$adducts, sep="_")
    
    # extract EIC for each MS1 peak
    eic <- list()
    for (m in 1:length(mzpeaksMS1)){
        eic[[m]] <- msobject$rawData$MS1[abs(msobject$rawData$MS1$mz - mzpeaksMS1[m])*1e6/mzpeaksMS1[m] <= ppm,, drop = FALSE]
        if (nrow(eic[[m]]) > 0){
          eic[[m]] <- eic[[m]][order(eic[[m]]$RT), c("RT", "int")]
        } else {
          eic[[m]] <- data.frame()
      }
    }
    
    ############################################################################
    # MS2 peaks
    peaksMS2 <- c()
    scansMS2 <- c() # to use in case data is DDA
    namesMS2 <- c()
    mzpeaksMS2 <- c() # to use in case data is DIA
    class <- c()
    chains <- c()

    # get index for all adducts (from candidates df)
    c <- which(msobject$annotation$detailsAnnotation[[parent$Class]]$candidates$peakID %in% ms1$peakID)

    # extract class fragments
    if ("classfragments" %in% names(msobject$annotation$detailsAnnotation[[parent$Class]])){
      class <- do.call(rbind, msobject$annotation$detailsAnnotation[[parent$Class]]$classfragments[c])
      if (length(class) > 0){
        if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
          peaksMS2 <- c(peaksMS2, class$peakID)
          mzpeaksMS2 <- c(mzpeaksMS2, class$mz)
        } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
          peaksMS2 <- c(peaksMS2, class$mz)
          scansMS2 <- c(scansMS2, class$peakID)
        }
        namesMS2 <- c(namesMS2, paste(as.character(round(class$mz, 3)), "class fragment", sep="_"))
      }
    }

    # extract chain fragments
    if ("chainfragments" %in% names(msobject$annotation$detailsAnnotation[[parent$Class]])){
      if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
        for (i in c){
          ch <- c()
          if (length(msobject$annotation$detailsAnnotation[[parent$Class]]$chainfragments[[i]]) > 0){
            ch <- do.call(rbind, msobject$annotation$detailsAnnotation[[parent$Class]]$chainfragments[[i]])
            if (length(ch) > 0){
              chains <- rbind(chains, ch)
              ch <- c()
            }
          }
        }
        if (length(chains) > 0){
          peaksMS2 <- c(peaksMS2, chains$peakID)
          namesMS2 <- c(namesMS2, paste(as.character(round(chains$mz, 3)), chains$db, chains$cb, chains$adduct, sep="_"))
          mzpeaksMS2 <- c(mzpeaksMS2, chains$mz)
        }
      } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
        for (i in c){
          ch <- c()
          if (length(msobject$annotation$detailsAnnotation[[parent$Class]]$chainfragments[[i]]) > 0){
            ch <- do.call(rbind, msobject$annotation$detailsAnnotation[[parent$Class]]$chainfragments[[i]])
            if (length(ch) > 0){
              chains <- rbind(chains, ch)
              ch <- c()
            }
          }
        }
        if (length(chains) > 0){
          chains <- unique(chains)
          chains <- chains[chains$mz != 0,]
          peaksMS2 <- c(peaksMS2, chains$mz)
          scansMS2 <- c(scansMS2, chains$peakID)
          namesMS2 <- c(namesMS2, paste(as.character(round(chains$mz, 3)), chains$db, chains$cb, chains$adduct, sep="_"))
        }
      }
    }
    
    # remove duplicates
    if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
      mzpeaksMS2 <- mzpeaksMS2[peaksMS2 != ""]
    } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
      scansMS2 <- scansMS2[peaksMS2 != ""]
    }
    namesMS2 <- namesMS2[peaksMS2 != ""]
    peaksMS2 <- peaksMS2[peaksMS2 != ""]
    if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
      mzpeaksMS2 <- mzpeaksMS2[!duplicated(peaksMS2)]
    } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
      scansMS2 <- scansMS2[!duplicated(peaksMS2)]
    }
    namesMS2 <- namesMS2[!duplicated(peaksMS2)]
    peaksMS2 <- peaksMS2[!duplicated(peaksMS2)]
    
    # if data is DIA, extract EIC for each MS2 peak
    if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
      eic2 <- list()
      if (length(mzpeaksMS2) > 0){
        for (m in 1:length(mzpeaksMS2)){
          eic2[[m]] <- msobject$rawData$MS2[abs(msobject$rawData$MS2$mz - mzpeaksMS2[m])*1e6/mzpeaksMS2[m] <= ppm,, drop = FALSE]
          if (nrow(eic2[[m]]) > 0){
            eic2[[m]] <- eic2[[m]][order(eic2[[m]]$RT), c("RT", "int")]
          } else {
            eic2[[m]] <- data.frame()
          }
        }
      }
    }
    
    # if data is DDA, extract spectrum data
    if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
      rawMS <- c()
      for (i in c){
        f <- c()
        if (length(msobject$annotation$detailsAnnotation[[parent$Class]]$coelfrags[[i]]) > 0){
          f <- msobject$annotation$detailsAnnotation[[parent$Class]]$coelfrags[[i]]
          if (length(f) > 0){
            rawMS <- rbind(rawMS, f)
            f <- c()
          }
        }
      }
    }

    ############################################################################
    # Plot
    
    # save par parameters
    oldpar <- graphics::par(no.readonly = TRUE, new = FALSE)
    on.exit(graphics::par(oldpar))
    
    colorsMS1 <- c("#42858C", "#FE9300", "#870E75", "#3E71A8", "#FE6900", 
                   grDevices::colors()[!grepl("white|gr[e|a]y", 
                                              grDevices::colors())])
    colorsMS2 <- c("#7F8E39", "#5F3659", "#E5C616", "#16A08CFF", "#628395",
                   "#C5D86D", "#969696FF", "#358359FF", "#9F4D23FF", 
                   "#D86C4FFF", "#170C2EFF", "#473B75FF", "#F19C1FFF",
                   "#117733", "#DDCC77", "#CC6677", "#88CCEE",
                   "#44AA99", "#332288", "#AA4499", "#999933",
                   "#882255", "#661100", "#6699CC", "#888888", 
                   grDevices::colors()[!grepl("white|gr[e|a]y", 
                                              grDevices::colors())])

    if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
      nplots <- 1 + length(unique(scansMS2))
    } else {
      nplots <- 2
    }
    grDevices::pdf(NULL) # use a pdf NULL device to save plots to an object
    grDevices::dev.control(displaylist = "enable")
    graphics::par(mfrow=c(nplots, 2), mar = c(3,4,4,1), mgp=c(2,1,0), bg = "white")
    # plot MS1 info
    if (length(peaksMS1) > 0){
      ssms1 <- msobject$rawData$MS1[msobject$rawData$MS1$peakID %in% peaksMS1,]
      ssms1 <- ssms1[order(ssms1$RT),]
      minrt1 <- min(ssms1$RT)
      maxrt1 <- max(ssms1$RT)
      ints <- c()
      for (p in 1:length(peaksMS1)){
        toplot <- msobject$rawData$MS1[msobject$rawData$MS1$peakID == peaksMS1[p],]
        toplot <- toplot[order(toplot$RT, decreasing = FALSE),]
        maxint <- max(toplot$int, na.rm=TRUE)
        ints <- append(ints, maxint)
        toplot$int <- toplot$int*100/maxint
        if (p == 1){
          if (nrow(toplot) > 1){
            plot(toplot$RT, toplot$int, type = "l", col = scales::alpha(colorsMS1[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste0("MS1: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                             as.character(round(parent$RT, 1)), sep="_"),
                               "\n(", paste(ms1$peakID, collapse="; "), ")"),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1)
            eic[[1]] <- eic[[1]][eic[[1]]$RT <= maxrt1+10 & eic[[1]]$RT >= minrt1-5,]
            graphics::lines(x=eic[[1]]$RT, y=eic[[1]]$int*100/maxint, 
                            col = scales::alpha("grey", 0.4), lwd = 2.5)
          } else {
            plot(0, 0, type = "n", col = scales::alpha(colorsMS1[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste0("MS1: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                              as.character(round(parent$RT, 1)), sep="_"),
                               "\n(", paste(ms1$peakID, collapse="; "), ")"),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1)
          }
          
        } else {
          if (nrow(toplot) > 1){
            graphics::lines(toplot$RT, toplot$int, col = scales::alpha(colorsMS1[p], 0.8), lwd = 2.5)
            eic[[p]] <- eic[[p]][eic[[p]]$RT <= maxrt1+10 & eic[[p]]$RT >= minrt1-5,]
            graphics::lines(eic[[p]]$RT, eic[[p]]$int*100/maxint, 
                          col = scales::alpha("grey", 0.4), lwd = 2.5)
          }
        }
      }
      graphics::legend("topright", legend=namesMS1,
             col=colorsMS1[1:length(peaksMS1)], lty=1, lwd = 2, cex=0.6)
      graphics::legend("bottomright", title = "Max. intensity",
             legend=formatC(ints, format = "e", digits = 2),
             col=colorsMS1[1:length(peaksMS1)], lty=1, lwd = 2, cex=0.6)

      # smoothed
      for (p in 1:length(peaksMS1)){
        toplot <- msobject$rawData$MS1[msobject$rawData$MS1$peakID == peaksMS1[p],]
        toplot <- toplot[order(toplot$RT, decreasing = FALSE),]
        pred <- tryCatch({stats::predict(stats::smooth.spline(toplot$RT, toplot$int, spar = span),
                                  x = toplot$RT)},
                         error = function(e) {return(list(x = toplot$RT,
                                                          y = toplot$int))})
        toplot$RT <- pred$x
        toplot$int <- pred$y
        maxint <- max(toplot$int, na.rm=TRUE)
        toplot$int <- toplot$int*100/maxint
        if (p == 1){
          if (nrow(toplot) > 1){
            plot(toplot$RT, toplot$int, type = "l", col = scales::alpha(colorsMS1[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste0("MS1: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                              as.character(round(parent$RT, 1)), sep="_"),
                               "\n(", paste(ms1$peakID, collapse="; "), ")"),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 5)
            smt <- tryCatch({stats::predict(stats::smooth.spline(eic[[1]]$RT, 
                                                                 eic[[1]]$int, 
                                                                 spar = span),
                                            x = eic[[1]]$RT)},
                            error = function(e) {return(list(x = eic[[1]]$RT,
                                                             y = eic[[1]]$int))})
            graphics::lines(x=smt$x, y=smt$y*100/maxint, 
                            col = scales::alpha("grey", 0.4), lwd = 2.5, lty = 5)
            
          } else {
            plot(0, 0, type = "n", col = scales::alpha(colorsMS1[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste0("MS1: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                              as.character(round(parent$RT, 1)), sep="_"),
                               "\n(", paste(ms1$peakID, collapse="; "), ")"),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 5)
          }
          
        } else {
          if (nrow(toplot) > 1){
            graphics::lines(toplot$RT, toplot$int, col = scales::alpha(colorsMS1[p], 0.8), lwd = 2.5,
                            lty = 5)
            smt <- tryCatch({stats::predict(stats::smooth.spline(eic[[p]]$RT, 
                                                                 eic[[p]]$int, 
                                                                 spar = span),
                                            x = eic[[p]]$RT)},
                            error = function(e) {return(list(x = eic[[p]]$RT,
                                                             y = eic[[p]]$int))})
            graphics::lines(x=smt$x, y=smt$y*100/maxint, 
                            col = scales::alpha("grey", 0.4), lwd = 2.5, lty = 5)
          }
        }
      }
      graphics::legend("topright", legend=namesMS1,
             col=colorsMS1[1:length(peaksMS1)], lty = 5, lwd = 2, cex=0.6)
      graphics::legend("bottomright", title = "Max. intensity",
             legend=formatC(ints, format = "e", digits = 2),
             col=colorsMS1[1:length(peaksMS1)], lty = 5, lwd = 2, cex=0.6)
    }

    # plot MS2 info
    if (length(peaksMS2) > 0){
      # if data is DIA
      if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
        ssms2 <- msobject$rawData$MS2[msobject$rawData$MS2$peakID %in% peaksMS2,]
        minrt2 <- min(ssms2$RT)
        maxrt2 <- max(ssms2$RT)
        maxint2 <- max(ssms2$int)
        ints2 <- c()
        for (p in 1:length(peaksMS2)){
          toplot <- msobject$rawData$MS2[msobject$rawData$MS2$peakID == peaksMS2[p],]
          toplot <- toplot[order(toplot$RT, decreasing = FALSE),]
          maxint <- max(toplot$int, na.rm=TRUE)
          ints2 <- append(ints2, maxint)
          toplot$int <- toplot$int*100/maxint
          if (p == 1){
            if (nrow(toplot) > 1){
              plot(toplot$RT, toplot$int, type = "l", col = scales::alpha(colorsMS2[p], 0.8),
                   xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                   lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                   main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                               as.character(round(parent$RT, 1)), sep="_"), sep = ""),
                   las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1)
              eic2[[1]] <- eic2[[1]][eic2[[1]]$RT <= maxrt1+10 & eic2[[1]]$RT >= minrt1-5,]
              graphics::lines(x=eic2[[1]]$RT, y=eic2[[1]]$int*100/maxint, 
                              col = scales::alpha("grey", 0.4), lwd = 2.5)
            } else {
              plot(0, 0, type = "n", col = scales::alpha(colorsMS2[p], 0.8),
                   xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                   lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                   main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                               as.character(round(parent$RT, 1)), sep="_"), sep = ""),
                   las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1)
            }
            
          } else {
            if (nrow(toplot) > 1){
              graphics::lines(toplot$RT, toplot$int, col = scales::alpha(colorsMS2[p], 0.8), lwd = 2.5)
              eic2[[p]] <- eic2[[p]][eic2[[p]]$RT <= maxrt1+10 & eic2[[p]]$RT >= minrt1-5,]
              graphics::lines(x=eic2[[p]]$RT, y=eic2[[p]]$int*100/maxint, 
                              col = scales::alpha("grey", 0.4), lwd = 2.5)
            }
          }
        }
        graphics::legend("topright", legend=namesMS2,
               col=colorsMS2[1:length(peaksMS2)], lty=1, lwd = 2, cex=0.6)
        graphics::legend("bottomright", title = "Max. intensity",
               legend=formatC(ints2, format = "e", digits = 2),
               col=colorsMS2[1:length(peaksMS2)], lty=1, lwd = 2, cex=0.6)

        # smoothed
        for (p in 1:length(peaksMS2)){
          toplot <- msobject$rawData$MS2[msobject$rawData$MS2$peakID == peaksMS2[p],]
          toplot <- toplot[order(toplot$RT, decreasing = FALSE),]
          maxint <- max(toplot$int, na.rm=TRUE)
          pred <- tryCatch({stats::predict(stats::smooth.spline(toplot$RT, toplot$int, spar = span),
                                    x = toplot$RT)},
                           error = function(e) {return(list(x = toplot$RT,
                                                            y = toplot$int))})
          toplot$RT <- pred$x
          toplot$int <- pred$y
          toplot$int <- toplot$int*100/maxint
          if (p == 1){
            if (nrow(toplot) > 1){
              plot(toplot$RT, toplot$int, type = "l", col = scales::alpha(colorsMS2[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                             as.character(round(parent$RT, 1)), sep="_"), sep = ""),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 5)
              smt <- tryCatch({stats::predict(stats::smooth.spline(eic2[[1]]$RT, 
                                                                 eic2[[1]]$int, 
                                                                 spar = span),
                                            x = eic2[[1]]$RT)},
                            error = function(e) {return(list(x = eic2[[1]]$RT,
                                                             y = eic2[[1]]$int))})
              graphics::lines(x=smt$x, y=smt$y*100/maxint, 
                            col = scales::alpha("grey", 0.4), lwd = 2.5, lty = 5)
            } else {
              plot(0, 0, type = "n", col = scales::alpha(colorsMS2[p], 0.8),
                   xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                   lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                   main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                               as.character(round(parent$RT, 1)), sep="_"), sep = ""),
                   las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 5)
            }
            
          } else {
            if (nrow(toplot) > 1){
              graphics::lines(toplot$RT, toplot$int, col = scales::alpha(colorsMS2[p], 0.8), lwd = 2.5,
                              lty = 5)
              smt <- tryCatch({stats::predict(stats::smooth.spline(eic2[[p]]$RT, 
                                                                   eic2[[p]]$int, 
                                                                   spar = span),
                                              x = eic2[[p]]$RT)},
                              error = function(e) {return(list(x = eic2[[p]]$RT,
                                                               y = eic2[[p]]$int))})
              graphics::lines(x=smt$x, y=smt$y*100/maxint,  
                              col = scales::alpha("grey", 0.4), lwd = 2.5, lty = 5)
            }
          }
        }
        graphics::legend("topright", legend=namesMS2,
               col=colorsMS2[1:length(peaksMS2)], lty = 5, lwd = 2, cex=0.6)
        graphics::legend("bottomright", title = "Max. intensity",
               legend=formatC(ints2, format = "e", digits = 2),
               col=colorsMS2[1:length(peaksMS2)], lty = 5, lwd = 2, cex=0.6)
        # if data is DDA
      } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
        # for each scan
        for (s in unique(scansMS2)){
          # subset raw data
          ssrawMS <- rawMS[rawMS$peakID == s,]
          ssmaxrawMS <- max(ssrawMS$int)
          ssrawMS$int <- ssrawMS$int*100/max(ssrawMS$int)
          ssrawMS$int[ssrawMS$int < 2] <- ssrawMS$int[ssrawMS$int < 2] + 2 # to improve visualization
          mz2 <- peaksMS2[scansMS2 == s]
          namesmz2 <- namesMS2[scansMS2 == s]
          namesmz2 <- namesmz2[order(mz2, decreasing = FALSE)]
          mz2 <- mz2[order(mz2, decreasing = FALSE)]
          
          # assign colors
          ssrawMS$color <- "black"
          ssrawMS$color[ssrawMS$mz %in% mz2] <- colorsMS2[1:sum(ssrawMS$mz %in% mz2)]

          # Find precursor in the MS/MS spectrum
          scanprec <- unlist(strsplit(s, "_"))
          collisionenergy <- as.numeric(scanprec[2])
          scanprec <- as.numeric(scanprec[3])
          precursor <- msobject$metaData$scansMetadata$precursor[
            which(msobject$metaData$scansMetadata$msLevel == 2 &
                    msobject$metaData$scansMetadata$collisionEnergy == collisionenergy)[scanprec]]
          prec <- as.numeric(unlist(sapply(precursor, mzMatch, ssrawMS$mz, ppm = 10)))
          if (length(prec) > 0){
            minppm <- which.min(prec[seq(2, length(prec), 2)])
            prec <- prec[seq(1, length(prec), 2)][minppm]
            mzprec <- ssrawMS$mz[prec]
            nameprec <- paste(round(mzprec, 3), "_precursor", sep="")
            if (ssrawMS$color[prec] == "black"){
              ssrawMS$color[prec] <- colorsMS1[1]
              namesmz2 <- c(namesmz2, nameprec)
            }
          }
          colors2 <- ssrawMS$color[ssrawMS$color != "black"]
          blacks <- ssrawMS$color == "black"
          
          #plot
          plot(ssrawMS$mz[blacks], ssrawMS$int[blacks], type = "h", 
               col = scales::alpha(ssrawMS$color[blacks], 0.7),
               xlim = c(0, max(ssrawMS$mz)+20), ylim = c(0, 132),
               lwd = 1, ylab = "Rel. Intensity", xlab = "m/z",
               main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                           as.character(round(parent$RT, 1)), sep="_"),
                            paste("\nPrecursor: ", round(precursor, 3), sep = ""),
                            sep = ""),
               las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 1, yaxt = "n" )
          lines(ssrawMS$mz[!blacks], ssrawMS$int[!blacks], type = "h", 
               col = scales::alpha(ssrawMS$color[!blacks], 1))
          graphics::axis(2,at=seq(2, 122, 20), labels = seq(0, 120, 20))
          graphics::legend("topright", legend=namesmz2,
                 col=colors2, lty = 1, lwd = 2, cex=0.6)

          # clean
          ssrawMSclean <- ssrawMS[!blacks,]
          plot(ssrawMSclean$mz, ssrawMSclean$int, type = "h",
               col = scales::alpha(ssrawMSclean$color, 1),
               xlim = c(0, max(ssrawMS$mz)+20), ylim = c(0, 132),
               lwd = 1.5, ylab = "Rel. Intensity", xlab = "m/z",
               main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                           as.character(round(parent$RT, 1)), sep="_"),
                            paste("\nPrecursor: ", round(precursor, 3), sep = ""),
                            sep = ""),
               las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 1, yaxt = "n" )
          graphics::axis(2,at=seq(2, 102, 20), labels = seq(0, 100, 20))
          graphics::legend("topright", legend=namesmz2,
                 col=colors2, lty = 1, lwd = 2, cex=0.6)
        }
      }
    }
    msobject$annotation$plots[[r]] <- grDevices::recordPlot() # save plot
    invisible(grDevices::dev.off()) # close pdf NULL device
    results <- results[-toremove,]
  }
  return(msobject)
}

# plotticmsbatch
#' TIC for all samples in a msbatch
#'
#' TIC for all samples in a msbatch
#'
#' @param msbatch msbatch
#' @param rt numeric vector with the RT range to be plotted
#' @param colorbygroup logical. If TRUE, samples will be coloured based on their
#' sample group (from metadata). 
#'
#' @return plot
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
plotticmsbatch <- function(msbatch, rt, colorbygroup = TRUE){
  if (missing(rt)){
    maxs <- unlist(lapply(msbatch$msobjects, function(x) x$metaData$generalMetadata$endTime))
    mins <- unlist(lapply(msbatch$msobjects, function(x) x$metaData$generalMetadata$startTime))
    rt <- c(min(mins), max(maxs))
  }
  maxint <- max(unlist(lapply(msbatch$msobjects, function(x) x$metaData$scansMetadata$totIonCurrent))) 
  
  # set color palette
  palette <- c("#42858C", "#FE9300", "#870E75", "#3E71A8", 
               "#7F8E39", "#5F3659", "#E5C616",
               "#16A08CFF", "#FE6900", "#628395", "#C5D86D", "#969696FF",
               "#358359FF", "#9F4D23FF", "#D86C4FFF", "#170C2EFF",
               "#473B75FF", "#F19C1FFF",
               "#117733", "#DDCC77", "#CC6677", "#88CCEE",
               "#44AA99", "#332288", "#AA4499", "#999933",
               "#882255", "#661100", "#6699CC", "#888888")
  if (length(msbatch$msobjects) > length(palette)){
    set.seed(19580811)
    colors <- grDevices::colors()[grep('gr(a|e)y|white|light', grDevices::colors(), invert = T)]
    colors <- sample(colors, size = (length(msbatch$msobjects) - length(palette)))
    palette <- c(palette, colors)
  }
  if (colorbygroup){
    samplescolor <- as.factor(msbatch$metaData$sampletype)
    legendnames <- levels(as.factor(msbatch$metaData$sampletype))
  } else {
    samplescolor <- 1:length(msbatch$msobjects)
    legendnames <- msbatch$metaData$sample
  }
  
  # plot tics (if there is MS1 and MS2, use just MS1)
  whichmslevel <- lapply(msbatch$msobjects, function(x) 
    unique(x$metaData$scansMetadata$msLevel))
  x <- msbatch$msobjects[[1]]$metaData$scansMetadata$RT
  y <- msbatch$msobjects[[1]]$metaData$scansMetadata$totIonCurrent
  if (all(c(1, 2) %in% whichmslevel[[1]])){
    x <- x[which(msbatch$msobjects[[1]]$metaData$scansMetadata$msLevel == 1)]
    y <- y[msbatch$msobjects[[1]]$metaData$scansMetadata$msLevel == 1]
  }
  plot(x = x, y = y,
       xlim = rt, ylim = c(0, maxint+maxint*0.1), 
       type = "l", col = scales::alpha(palette[samplescolor[1]], 0.7),
       main = "Total Ion Chromatograms", xlab = "RT (sec)", ylab = "Intensity")
  
  for (i in 2:length(msbatch$msobjects)){
    x <- msbatch$msobjects[[i]]$metaData$scansMetadata$RT
    y <- msbatch$msobjects[[i]]$metaData$scansMetadata$totIonCurrent
    if (all(c(1, 2) %in% whichmslevel[[i]])){
      x <- x[msbatch$msobjects[[i]]$metaData$scansMetadata$msLevel == 1]
      y <- y[msbatch$msobjects[[i]]$metaData$scansMetadata$msLevel == 1]
    }
    lines(x = x, y = y,
          type = "l", col = scales::alpha(palette[samplescolor[i]], 0.7))
  }
  legend("topright", legend = legendnames, 
         col = scales::alpha(palette, 0.7), lty = 1, lwd = 2, cex = 0.5)
}

# ploteicmsbatch
#' EIC for all samples in a msbatch
#'
#' EIC for all samples in a msbatch
#'
#' @param msbatch msbatch
#' @param mz mz of interest
#' @param ppm mass tolerance in ppm
#' @param rt numeric vector with the RT range to be plotted
#' @param colorbygroup logical. If TRUE, samples will be coloured based on their
#' sample group (from metadata).
#' @param verbose print information messages.
#'
#' @return plot
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
ploteicmsbatch <- function(msbatch, 
                           mz, 
                           ppm, 
                           rt, 
                           colorbygroup = TRUE, 
                           verbose = TRUE){
  if (missing(rt)){
    maxs <- unlist(lapply(msbatch$msobjects, function(x) x$metaData$generalMetadata$endTime))
    rt <- c(0, max(maxs))
  }
  # extract eics
  eic <- list()
  for (s in 1:length(msbatch$msobjects)){
    e <- msbatch$msobjects[[s]]$rawData$MS1[abs(msbatch$msobjects[[s]]$rawData$MS1$mz - mz)*1e6/mz < ppm,, drop = FALSE]
    e <- e[e$RT >= rt[1] & e$RT <= rt[2],]
    if (nrow(e) > 0){
      eic[[s]] <- e[order(e$RT), c("RT", "int")]
    } else {
      eic[[s]] <- data.frame()
    }
    
  }
  maxint <- max(unlist(lapply(eic, function(x) if(nrow(x) > 0){max(x$int, na.rm = TRUE)} else {0})), na.rm = TRUE) 
  
  # set color palette
  palette <- c("#42858C", "#FE9300", "#870E75", "#3E71A8", 
               "#7F8E39", "#5F3659", "#E5C616",
               "#16A08CFF", "#FE6900", "#628395", "#C5D86D", "#969696FF",
               "#358359FF", "#9F4D23FF", "#D86C4FFF", "#170C2EFF",
               "#473B75FF", "#F19C1FFF",
               "#117733", "#DDCC77", "#CC6677", "#88CCEE",
               "#44AA99", "#332288", "#AA4499", "#999933",
               "#882255", "#661100", "#6699CC", "#888888")
  if (length(msbatch$msobjects) > length(palette)){
    set.seed(19580811)
    colors <- grDevices::colors()[grep('gr(a|e)y|white|light', grDevices::colors(), invert = T)]
    colors <- sample(colors, size = (length(msbatch$msobjects) - length(palette)))
    palette <- c(palette, colors)
  }
  if (colorbygroup){
    samplescolor <- as.factor(msbatch$metaData$sampletype)
    legendnames <- levels(as.factor(msbatch$metaData$sampletype))
  } else {
    samplescolor <- 1:length(msbatch$msobjects)
    legendnames <- msbatch$metaData$sample
  }
  
  # plot eics
  startat <- 1
  start <- FALSE
  while (!start){
    if (nrow(eic[[startat]]) > 0){
      smt <- tryCatch({stats::predict(stats::smooth.spline(eic[[startat]]$RT,
                                                           eic[[startat]]$int,
                                                           spar = 0.05),
                                      x = eic[[startat]]$RT)},
                      error = function(e) {return(list(x = eic[[startat]]$RT,
                                                       y = eic[[startat]]$int))})
      smt$y[smt$y < 0] <- 0
      
      plot(smt$x, smt$y, xlim = rt, 
           ylim = c(0, maxint+maxint*0.1), type = "l", lwd = 2,
           col = scales::alpha(palette[samplescolor[startat]], 0.7), 
           main = paste("EIC: ", mz, sep=""), 
           xlab = "RT (sec)", ylab = "Intensity")
      start <- TRUE
    } else if (nrow(eic[[startat]]) == 0 & startat < length(msbatch$msobjects)) {
      startat <- startat + 1
    } else {
      if(verbose){cat("No peaks found")}
      plot(0, xlim = rt, ylim = c(0, 100), type = "l", lwd = 2,
           col = scales::alpha(palette[samplescolor[1]], 0.7), 
           main = paste("EIC: ", mz, sep=""), 
           xlab = "RT (sec)", ylab = "Intensity")
      legend("topright", legend = msbatch$metaData$sample, 
             col = scales::alpha(palette, 0.7), lty = 1, lwd = 2, cex = 0.5)
      break
    }
  }
  if (length(eic) > startat){
    if (start){
      for (i in (startat+1):length(msbatch$msobjects)){
        if (nrow(eic[[i]] > 0)){
          smt <- tryCatch({stats::predict(stats::smooth.spline(eic[[i]]$RT,
                                                               eic[[i]]$int,
                                                               spar = 0.05),
                                          x = eic[[i]]$RT)},
                          error = function(e) {return(list(x = eic[[i]]$RT,
                                                           y = eic[[i]]$int))})
          smt$y[smt$y < 0] <- 0
          
          lines(smt$x, smt$y, xlim = rt, type = "l", lwd = 2, 
                col = scales::alpha(palette[samplescolor[i]], 0.7), 
                main = paste("EIC: ", mz, sep=""), 
                xlab = "RT (sec)", ylab = "Intensity")
        }
      }
      legend("topright", legend = legendnames, 
             col = scales::alpha(palette, 0.7), lty = 1, lwd = 2, cex = 0.5)
    }
  }
}

# rtdevplot
#' Plot retention time deviation
#'
#' Plot retention time deviation of an aligned msbatch
#'
#' @param msbatch aligned msbatch.
#' @param colorbygroup logical. If TRUE, samples will be coloured based on their
#' sample group (from metadata).
#'
#' @return plot
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
rtdevplot <- function(msbatch, colorbygroup = TRUE){
  # set color palette
  palette <- c("#42858C", "#FE9300", "#870E75", "#3E71A8", 
               "#7F8E39", "#5F3659", "#E5C616",
               "#16A08CFF", "#FE6900", "#628395", "#C5D86D", "#969696FF",
               "#358359FF", "#9F4D23FF", "#D86C4FFF", "#170C2EFF",
               "#473B75FF", "#F19C1FFF",
               "#117733", "#DDCC77", "#CC6677", "#88CCEE",
               "#44AA99", "#332288", "#AA4499", "#999933",
               "#882255", "#661100", "#6699CC", "#888888")
  if (length(msbatch$msobjects) > length(palette)){
    set.seed(19580811)
    colors <- grDevices::colors()[grep('gr(a|e)y|white|light', grDevices::colors(), invert = T)]
    colors <- sample(colors, size = (length(msbatch$msobjects) - length(palette)))
    palette <- c(palette, colors)
  }
  if (colorbygroup){
    samplescolor <- as.factor(msbatch$metaData$sampletype)
    legendnames <- levels(as.factor(msbatch$metaData$sampletype))
  } else {
    samplescolor <- 1:length(msbatch$msobjects)
    legendnames <- msbatch$metaData$sample
  }
  
  maxdev <- max(unlist(lapply(msbatch$alignment$rtdevcorrected, function(x) x$RTdev)))
  mindev <- min(unlist(lapply(msbatch$alignment$rtdevcorrected, function(x) x$RTdev)))
  xmax <- max(unlist(lapply(msbatch$msobjects, function(x) x$metaData$generalMetadata$endTime)))
  
  # remove zeros at the start and the end of the rtdev vector
  x <- msbatch$alignment$rtdevcorrected[[1]]$RT
  y <- msbatch$alignment$rtdevcorrected[[1]]$RTdev
  x <- x[cumsum(y) & rev(cumsum(rev(y)))]
  y <- y[cumsum(y) & rev(cumsum(rev(y)))]
  plot(x = x, y = y, 
       xlim = c(0, xmax+xmax/10), ylim = c(mindev-1, maxdev+1), type = "l", 
       col = scales::alpha(palette[samplescolor[1]], 0.7), lwd = 2, 
       main = "Retention time deviation", xlab = "RT (sec)", ylab = "RT dev (sec)")
  lines(x = x, y = rep(0, length(x)), col = "darkgrey", lwd = 1, lty = 2)
  for (s in 2:length(msbatch$msobjects)){
    x <- msbatch$alignment$rtdevcorrected[[s]]$RT
    y <- msbatch$alignment$rtdevcorrected[[s]]$RTdev
    x <- x[cumsum(y) & rev(cumsum(rev(y)))]
    y <- y[cumsum(y) & rev(cumsum(rev(y)))]
    lines(x = x, y = y, col = scales::alpha(palette[samplescolor[s]], 0.7))
  }
  legend("topright", legend = legendnames, 
         col = scales::alpha(palette, 0.7), lty = 1, lwd = 2, cex = 0.5)
}

# createLipidDB
#' Customizable lipid DBs creator
#'
#' It allows to create easy-customizable lipid DBs for annotation with LipidMS
#' package.
#'
#' @param lipid character value indicating the class of lipid. See Details.
#' @param chains character vector indicating the FA chains to be employed
#' @param chains2 character vector containing the sphingoid bases to be employed
#' if required.
#'
#' @return List with the requested dbs (data frames)
#'
#' @details \code{lipidClass} argument needs to be one of the following
#' character values: "Cer", "CerP", "GlcCer", "SM", "Carnitine", "CE", "FA",
#' "HFA", "Sph" (sphingoid bases), "SphP", "MG", "LPA", , "LPC",
#' "LPE", "LPG", "LPI", "LPS", "FAHFA", "DG", "PC", "PE", "PG", "PI", "PS",
#' "PA", "TG", "CL" or "all".
#'
#' @examples
#' fas <- c("8:0", "10:0", "12:0", "14:0", "14:1", "15:0", "16:0", "16:1",
#' "17:0", "18:0", "18:1", "18:2", "18:3", "18:4", "20:0", "20:1", "20:2",
#' "20:3", "20:4", "20:5", "22:0", "22:1", "22:2", "22:3", "22:4", "22:5",
#' "22:6", "24:0", "24:1", "26:0")
#' sph <- c("16:0", "16:1", "18:0", "18:1")
#' newdb <- createLipidDB(lipid = "PC", chains = fas, chains2 = sph)
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
createLipidDB <- function(lipid, chains, chains2){
  customizedDataSets <- list()
  if (sum(lipid == "Cer" || lipid == "CerP" || lipid == "GlcCer" ||
          lipid == "SM" || lipid == "AcylCer") == 1){
    db <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = lipid)
    db <- data.frame(formula=db$formula, total=db$total,
                     Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                     stringsAsFactors = F)
    if (lipid == "Cer"){
      customizedDataSets[["cerdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                  Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "CerP"){
      customizedDataSets[["cerPdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "GlcCer"){
      customizedDataSets[["glccerdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "SM"){
      customizedDataSets[["smdb"]] <- data.frame(formula=db$formula,
                                                 total=db$total, Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "AcylCer"){
      customizedDataSets[["acylcerdb"]] <- data.frame(formula=db$formula,
                                                 total=db$total, Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
  } else if (sum(lipid == "FA" || lipid == "HFA" || lipid == "Carnitine" ||
                 lipid == "LPA" || lipid == "LPE" || lipid == "LPG" ||
                 lipid == "LPI" || lipid == "LPS" || lipid == "LPC" ||
                 lipid == "MG" || lipid == "CE" || lipid == "Sph" ||
                 lipid == "SphP" || lipid == "LPEo" || lipid == "LPAo" ||
                 lipid == "LPCp" || lipid == "LPCo" || lipid == "LPEp") == 1){
    if (lipid %in% c("Sph", "SphP")){
      db <- dbOneChain(chains = chains2, lipid = lipid)
      db <- data.frame(formula=db$formula, total=db$total,
                       Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                       stringsAsFactors = F)
    } else {
      db <- dbOneChain(chains = chains, lipid = lipid)
      db <- data.frame(formula=db$formula, total=db$total,
                       Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                       stringsAsFactors = F)
    }
    if (lipid == "FA"){
      customizedDataSets[["fadb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "HFA"){
      customizedDataSets[["hfadb"]] <- data.frame(formula=db$formula, total=db$total,
                                                  Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "Carnitine"){
      customizedDataSets[["carnitinedb"]] <- data.frame(formula=db$formula, total=db$total,
                                                        Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPA"){
      customizedDataSets[["lysopadb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPE"){
      customizedDataSets[["lysopedb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPG"){
      customizedDataSets[["lysopgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPI"){
      customizedDataSets[["lysopidb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPS"){
      customizedDataSets[["lysopsdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPC"){
      customizedDataSets[["lysopcdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "MG"){
      customizedDataSets[["mgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "CE"){
      customizedDataSets[["CEdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "Sph"){
      customizedDataSets[["sphdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                  Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "SphP"){
      customizedDataSets[["sphPdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPEo"){
      customizedDataSets[["lysopeodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPAo"){
      customizedDataSets[["lysopaodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPCp"){
      customizedDataSets[["lysopcpdb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPCo"){
      customizedDataSets[["lysopcodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPEp"){
      customizedDataSets[["lysopepdb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
  } else if (sum(lipid == "DG" || lipid == "PC" || lipid == "PE" ||
                 lipid == "PG" || lipid == "PI" || lipid == "PS" || lipid == "PIP" ||
                 lipid == "PIP2" ||lipid == "PIP3" || lipid == "FAHFA" ||
                 lipid == "PA") == 1 || lipid == "PEo" || lipid == "PCp" ||
             lipid == "PCo" || lipid == "PEp"){
    db <- dbTwoChains(chains = chains, lipid = lipid)
    db <- data.frame(formula=db$formula, total=db$total,
                     Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                     stringsAsFactors = F)
    if (lipid == "FAHFA"){
      customizedDataSets[["fahfadb"]] <- data.frame(formula=db$formula, total=db$total,
                                                    Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "DG"){
      customizedDataSets[["dgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PE"){
      customizedDataSets[["pedb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PG"){
      customizedDataSets[["pgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PI"){
      customizedDataSets[["pidb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PIP"){
      customizedDataSets[["pipdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                  Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PIP2"){
      customizedDataSets[["pip2db"]] <- data.frame(formula=db$formula, total=db$total,
                                                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PIP3"){
      customizedDataSets[["pip3db"]] <- data.frame(formula=db$formula, total=db$total,
                                                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PS"){
      customizedDataSets[["psdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PC"){
      customizedDataSets[["pcdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PA"){
      customizedDataSets[["padb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PEo"){
      customizedDataSets[["peodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PCp"){
      customizedDataSets[["pcpdb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PCo"){
      customizedDataSets[["pcodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PEp"){
      customizedDataSets[["pepdb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
  } else if (lipid == "TG") {
    db <- dbThreeChains(chains = chains, lipid = lipid)
    db <- data.frame(formula=db$formula, total=db$total,
                     Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                     stringsAsFactors = F)
    customizedDataSets[["tgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                               Mass=as.numeric(db$Mass), stringsAsFactors = F)
  } else if (lipid == "CL") {
    db <- dbFourChains(chains = chains, lipid = lipid)
    db <- data.frame(formula=db$formula, total=db$total,
                     Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                     stringsAsFactors = F)
    customizedDataSets[["cldb"]] <- data.frame(formula=db$formula, total=db$total,
                                               Mass=as.numeric(db$Mass), stringsAsFactors = F)
  } else if (lipid == "all"){
    ceramides <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "Cer")
    ceramides <- data.frame(formula=ceramides$formula, total=ceramides$total,
                            Mass=as.numeric(ceramides$Mass), ID = paste("Cer(", ceramides$total,
                                                                        ")", sep=""), stringsAsFactors = F)
    
    ceramidesP <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "CerP")
    ceramidesP <- data.frame(formula=ceramidesP$formula, total=ceramidesP$total,
                             Mass=as.numeric(ceramidesP$Mass), ID = paste("CerP(", ceramidesP$total,
                                                                          ")", sep=""), stringsAsFactors = F)
    
    acylcer <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "AcylCer")
    acylcer <- data.frame(formula=acylcer$formula, total=acylcer$total, 
                          Mass=as.numeric(acylcer$Mass), ID = paste("AcylCer(", acylcer$total,
                                                                    ")", sep=""), stringsAsFactors = F)
    
    glccer <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "GlcCer")
    glccer <- data.frame(formula=glccer$formula, total=glccer$total,
                         Mass=as.numeric(glccer$Mass), ID = paste("GlcCer(", glccer$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    
    sm <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "SM")
    sm <- data.frame(formula=sm$formula, total=sm$total,
                     Mass=as.numeric(sm$Mass), ID = paste("SM(", sm$total,
                                                          ")", sep=""), stringsAsFactors = F)
    fa <- dbOneChain(chains = chains, lipid = "FA")
    fa <- data.frame(formula=fa$formula, total=fa$total,
                     Mass=as.numeric(fa$Mass), ID = paste("FA(", fa$total,
                                                          ")", sep=""), stringsAsFactors = F)
    hfa <- dbOneChain(chains = chains, lipid = "HFA")
    hfa <- data.frame(formula=hfa$formula, total=hfa$total,
                      Mass=as.numeric(hfa$Mass), ID = paste("HFA(", hfa$total,
                                                            ")", sep=""), stringsAsFactors = F)
    carnitine <- dbOneChain(chains = chains, lipid = "Carnitine")
    carnitine <- data.frame(formula=carnitine$formula, total=carnitine$total,
                            Mass=as.numeric(carnitine$Mass), ID = paste("Carnitine(", carnitine$total,
                                                                        ")", sep=""), stringsAsFactors = F)
    CE <- dbOneChain(chains = chains, lipid = "CE")
    CE <- data.frame(formula=CE$formula, total=CE$total,
                     Mass=as.numeric(CE$Mass), ID = paste("CE(", CE$total,
                                                          ")", sep=""), stringsAsFactors = F)
    mg <- dbOneChain(chains = chains, lipid = "MG")
    mg <- data.frame(formula=mg$formula, total=mg$total,
                     Mass=as.numeric(mg$Mass), ID = paste("MG(", mg$total,
                                                          ")", sep=""), stringsAsFactors = F)
    sph <- dbOneChain(chains = chains2, lipid = "Sph")
    sph <- data.frame(formula=sph$formula, total=sph$total,
                      Mass=as.numeric(sph$Mass), ID = paste("Sph(", sph$total,
                                                            ")", sep=""), stringsAsFactors = F)
    nlsph <- sph[,1:3]
    nlsph$Mass <- nlsph$Mass - 44.05
    
    sphP <- dbOneChain(chains = chains2, lipid = "SphP")
    sphP <- data.frame(formula=sphP$formula, total=sphP$total,
                       Mass=as.numeric(sphP$Mass), ID = paste("SphP(", sphP$total,
                                                              ")", sep=""), stringsAsFactors = F)
    lysopc <- dbOneChain(chains = chains, lipid = "LPC")
    lysopc <- data.frame(formula=lysopc$formula, total=lysopc$total,
                         Mass=as.numeric(lysopc$Mass), ID = paste("LPC(", lysopc$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysope <- dbOneChain(chains = chains, lipid = "LPE")
    lysope <- data.frame(formula=lysope$formula, total=lysope$total,
                         Mass=as.numeric(lysope$Mass), ID = paste("LPE(", lysope$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysopg <- dbOneChain(chains = chains, lipid = "LPG")
    lysopg <- data.frame(formula=lysopg$formula, total=lysopg$total,
                         Mass=as.numeric(lysopg$Mass), ID = paste("LPG(", lysopg$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysopi <- dbOneChain(chains = chains, lipid = "LPI")
    lysopi <- data.frame(formula=lysopi$formula, total=lysopi$total,
                         Mass=as.numeric(lysopi$Mass), ID = paste("LPI(", lysopi$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysops <- dbOneChain(chains = chains, lipid = "LPS")
    lysops <- data.frame(formula=lysops$formula, total=lysops$total,
                         Mass=as.numeric(lysops$Mass), ID = paste("LPS(", lysops$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysopa <- dbOneChain(chains = chains, lipid = "LPA")
    lysopa <- data.frame(formula=lysopa$formula, total=lysopa$total,
                         Mass=as.numeric(lysopa$Mass), ID = paste("LPA(", lysopa$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    pc <- dbTwoChains(chains = chains, lipid = "PC")
    pc <- data.frame(formula=pc$formula, total=pc$total,
                     Mass=as.numeric(pc$Mass), ID = paste("PC(", pc$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pe <- dbTwoChains(chains = chains, lipid = "PE")
    pe <- data.frame(formula=pe$formula, total=pe$total,
                     Mass=as.numeric(pe$Mass), ID = paste("PE(", pe$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pg <- dbTwoChains(chains = chains, lipid = "PG")
    pg <- data.frame(formula=pg$formula, total=pg$total,
                     Mass=as.numeric(pg$Mass), ID = paste("PG(", pg$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pi <- dbTwoChains(chains = chains, lipid = "PI")
    pi <- data.frame(formula=pi$formula, total=pi$total,
                     Mass=as.numeric(pi$Mass), ID = paste("PI(", pi$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pip <- dbTwoChains(chains = chains, lipid = "PIP")
    pip <- data.frame(formula=pip$formula, total=pip$total,
                      Mass=as.numeric(pip$Mass), ID = paste("PIP(", pip$total,
                                                            ")", sep=""), stringsAsFactors = F)
    pip2 <- dbTwoChains(chains = chains, lipid = "PIP2")
    pip2 <- data.frame(formula=pip2$formula, total=pip2$total,
                       Mass=as.numeric(pip2$Mass), ID = paste("PIP2(", pip2$total,
                                                              ")", sep=""), stringsAsFactors = F)
    pip3 <- dbTwoChains(chains = chains, lipid = "PIP3")
    pip3 <- data.frame(formula=pip3$formula, total=pip3$total,
                       Mass=as.numeric(pip3$Mass), ID = paste("PIP3(", pip3$total,
                                                              ")", sep=""), stringsAsFactors = F)
    ps <- dbTwoChains(chains = chains, lipid = "PS")
    ps <- data.frame(formula=ps$formula, total=ps$total,
                     Mass=as.numeric(ps$Mass), ID = paste("PS(", ps$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pa <- dbTwoChains(chains = chains, lipid = "PA")
    pa <- data.frame(formula=pa$formula, total=pa$total,
                     Mass=as.numeric(pa$Mass), ID = paste("PA(", pa$total,
                                                          ")", sep=""), stringsAsFactors = F)
    fahfa <- dbTwoChains(chains = chains, lipid = "FAHFA")
    fahfa <- data.frame(formula=fahfa$formula, total=fahfa$total,
                        Mass=as.numeric(fahfa$Mass), ID = paste("FAHFA(", fahfa$total,
                                                                ")", sep=""), stringsAsFactors = F)
    dg <- dbTwoChains(chains = chains, lipid = "DG")
    dg <- data.frame(formula=dg$formula, total=dg$total,
                     Mass=as.numeric(dg$Mass), ID = paste("DG(", dg$total,
                                                          ")", sep=""), stringsAsFactors = F)
    tg <- dbThreeChains(chains = chains, lipid = "TG")
    tg <- data.frame(formula=tg$formula, total=tg$total,
                     Mass=as.numeric(tg$Mass), ID = paste("TG(", tg$total,
                                                          ")", sep=""), stringsAsFactors = F)
    cl <- dbFourChains(chains = chains, lipid = "CL")
    cl <- data.frame(formula=cl$formula, total=cl$total,
                     Mass=as.numeric(cl$Mass), ID = paste("CL(", cl$total,
                                                          ")", sep=""), stringsAsFactors = F)
    peo <- dbTwoChains(chains = chains, lipid = "PEo")
    peo <- data.frame(formula=peo$formula, total=peo$total,
                      Mass=as.numeric(peo$Mass),
                      ID = paste("PEo(", peo$total, ")", sep=""),
                      stringsAsFactors = F)
    lysopeo <- dbOneChain(chains = chains, lipid = "LPEo")
    lysopeo <- data.frame(formula=lysopeo$formula, total=lysopeo$total,
                          Mass=as.numeric(lysopeo$Mass),
                          ID = paste("LPEo(", lysopeo$total, ")", sep=""),
                          stringsAsFactors = F)
    lysopao <- dbOneChain(chains = chains, lipid = "LPAo")
    lysopao <- data.frame(formula=lysopao$formula, total=lysopao$total,
                          Mass=as.numeric(lysopao$Mass),
                          ID = paste("LPAo(", lysopao$total, ")", sep=""),
                          stringsAsFactors = F)
    pcp <- dbTwoChains(chains = chains, lipid = "PCp")
    pcp <- data.frame(formula=pcp$formula, total=pcp$total,
                      Mass=as.numeric(pcp$Mass),
                      ID = paste("PCp(", pcp$total, ")", sep=""),
                      stringsAsFactors = F)
    lysopcp <- dbOneChain(chains = chains, lipid = "LPCp")
    lysopcp <- data.frame(formula=lysopcp$formula, total=lysopcp$total,
                          Mass=as.numeric(lysopcp$Mass),
                          ID = paste("LPCp(", lysopcp$total, ")", sep=""),
                          stringsAsFactors = F)
    pco <- dbTwoChains(chains = chains, lipid = "PCo")
    pco <- data.frame(formula=pco$formula, total=pco$total,
                      Mass=as.numeric(pco$Mass),
                      ID = paste("PCo(", pco$total, ")", sep=""),
                      stringsAsFactors = F)
    lysopco <- dbOneChain(chains = chains, lipid = "LPCo")
    lysopco <- data.frame(formula=lysopco$formula, total=lysopco$total,
                          Mass=as.numeric(lysopco$Mass),
                          ID = paste("LPCo(", lysopco$total, ")", sep=""),
                          stringsAsFactors = F)
    pep <- dbTwoChains(chains = chains, lipid = "PEp")
    pep <- data.frame(formula=pep$formula, total=pep$total,
                      Mass=as.numeric(pep$Mass),
                      ID = paste("PEp(", pco$total, ")", sep=""),
                      stringsAsFactors = F)
    lysopep <- dbOneChain(chains = chains, lipid = "LPEp")
    lysopep <- data.frame(formula=lysopep$formula, total=lysopep$total,
                          Mass=as.numeric(lysopep$Mass),
                          ID = paste("LPEp(", lysopep$total, ")", sep=""),
                          stringsAsFactors = F)
    
    
    customizedDataSets[["cerdb"]] <- ceramides
    customizedDataSets[["cerPdb"]] <- ceramidesP
    customizedDataSets[["acylcerdb"]] <- acylcer
    customizedDataSets[["glccerdb"]] <- glccer
    customizedDataSets[["smdb"]] <- sm
    customizedDataSets[["fadb"]] <- fa
    customizedDataSets[["hfadb"]] <- hfa
    customizedDataSets[["carnitinedb"]] <- carnitine
    customizedDataSets[["lysopadb"]] <- lysopa
    customizedDataSets[["lysopedb"]] <- lysope
    customizedDataSets[["lysopgdb"]] <- lysopg
    customizedDataSets[["lysopidb"]] <- lysopi
    customizedDataSets[["lysopsdb"]] <- lysops
    customizedDataSets[["lysopcdb"]] <- lysopc
    customizedDataSets[["mgdb"]] <- mg
    customizedDataSets[["CEdb"]] <- CE
    customizedDataSets[["sphdb"]] <- sph
    customizedDataSets[["sphPdb"]] <- sphP
    customizedDataSets[["fahfadb"]] <- fahfa
    customizedDataSets[["pedb"]] <- pe
    customizedDataSets[["pgdb"]] <- pg
    customizedDataSets[["pidb"]] <- pi
    customizedDataSets[["pipdb"]] <- pip
    customizedDataSets[["pip2db"]] <- pip2
    customizedDataSets[["pip3db"]] <- pip3
    customizedDataSets[["psdb"]] <- ps
    customizedDataSets[["padb"]] <- pa
    customizedDataSets[["pcdb"]] <- pc
    customizedDataSets[["dgdb"]] <- dg
    customizedDataSets[["tgdb"]] <- tg
    customizedDataSets[["cldb"]] <- cl
    customizedDataSets[["peodb"]] <- peo
    customizedDataSets[["lysopeodb"]] <- lysopeo
    customizedDataSets[["lysopaodb"]] <- lysopao
    customizedDataSets[["pcpdb"]] <- pcp
    customizedDataSets[["lysopcpdb"]] <- lysopcp
    customizedDataSets[["pcodb"]] <- pco
    customizedDataSets[["lysopcodb"]] <- lysopco
    customizedDataSets[["pepdb"]] <- pep
    customizedDataSets[["lysopepdb"]] <- lysopep
    customizedDataSets[["nlsphdb"]] <- nlsph
    customizedDataSets[["badb"]] <- LipidMS::badb
    customizedDataSets[["baconjdb"]] <- LipidMS::baconjdb
    customizedDataSets[["adductsTable"]] <- LipidMS::adductsTable
  }
  return(customizedDataSets)
}

# assignDB
#' Load LipidMS default data bases
#'
#' load all LipidMS default data bases required to run identification functions.
#'
#' @return list of data frames
#'
#' @examples
#' \dontrun{
#' dbs <- assignDB()
#' }
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
assignDB <- function(){
  dbs <- list()
  dbs[["cerdb"]] <- LipidMS::cerdb
  dbs[["cerPdb"]] <- LipidMS::cerPdb
  dbs[["acylcerdb"]] <- LipidMS::acylcerdb
  dbs[["smdb"]] <- LipidMS::smdb
  dbs[["fadb"]] <- LipidMS::fadb
  dbs[["hfadb"]] <- LipidMS::hfadb
  dbs[["carnitinedb"]] <- LipidMS::carnitinesdb
  dbs[["lysopadb"]] <- LipidMS::lysopadb
  dbs[["lysopaodb"]] <- LipidMS::lysopaodb
  dbs[["lysopedb"]] <- LipidMS::lysopedb
  dbs[["lysopeodb"]] <- LipidMS::lysopeodb
  dbs[["lysopepdb"]] <- LipidMS::lysopepdb
  dbs[["lysopgdb"]] <- LipidMS::lysopgdb
  dbs[["lysopidb"]] <- LipidMS::lysopidb
  dbs[["lysopsdb"]] <- LipidMS::lysopsdb
  dbs[["lysopcdb"]] <- LipidMS::lysopcdb
  dbs[["lysopcodb"]] <- LipidMS::lysopcodb
  dbs[["lysopcpdb"]] <- LipidMS::lysopcpdb
  dbs[["mgdb"]] <- LipidMS::mgdb
  dbs[["CEdb"]] <- LipidMS::CEdb
  dbs[["sphdb"]] <- LipidMS::sphdb
  dbs[["sphPdb"]] <- LipidMS::sphPdb
  dbs[["fahfadb"]] <- LipidMS::fahfadb
  dbs[["pcdb"]] <- LipidMS::pcdb
  dbs[["pcodb"]] <- LipidMS::pcodb
  dbs[["pcpdb"]] <- LipidMS::pcpdb
  dbs[["pedb"]] <- LipidMS::pedb
  dbs[["peodb"]] <- LipidMS::peodb
  dbs[["pepdb"]] <- LipidMS::pepdb
  dbs[["pgdb"]] <- LipidMS::pgdb
  dbs[["pidb"]] <- LipidMS::pidb
  dbs[["psdb"]] <- LipidMS::psdb
  dbs[["padb"]] <- LipidMS::padb
  dbs[["dgdb"]] <- LipidMS::dgdb
  dbs[["tgdb"]] <- LipidMS::tgdb
  dbs[["cldb"]] <- LipidMS::cldb
  dbs[["badb"]] <- LipidMS::badb
  dbs[["baconjdb"]] <- LipidMS::baconjdb
  dbs[["nlsphdb"]] <- LipidMS::nlsphdb
  dbs[["adductsTable"]] <- LipidMS::adductsTable
  return(dbs)
}

# getInclusionList
#' Obtain an inclusion list from the annotation results
#'
#' Obtain an inclusion list for the identified lipids.
#'
#' @param df data frame. Output of identification functions (results table from
#' an msobject or feature table from an msbatch).
#' @param dbs list of data bases required for annotation. By default, dbs
#' contains the required data frames based on the default fragmentation rules.
#' If these rules are modified, dbs may need to be supplied. See \link{createLipidDB}
#' and \link{assignDB}.
#'
#' @return Data frame with 6 columns: formula, RT, neutral mass, m/z, adduct
#' and the LipidMSid.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
getInclusionList <- function(df, dbs){
  
  if (missing(dbs)){
    dbs <- assignDB()
  }
  adductsTable = dbs$adductsTable
  
  if(!any(c("LipidMSid", "ID") %in% colnames(df))){
    stop("df must be annotated (i.e. results table from an msobject or feature table from an msbatch)")
  }
  
  results <- data.frame(mz = df$mz, RT = df$RT, Adduct = df$Adduct)
  if ("LipidMSid" %in% colnames(df)){
    results$LipidMSid <- unlist(sapply(df$LipidMSid, function(x){
      id <- unlist(strsplit(x, "[\\|;]"))[1]
      if (is.na(id)){id <- ""}
      return(id)
    }))
    results <- results[results$LipidMSid != "",]
    comp <- do.call(rbind, sapply(results$LipidMSid, function(x){
      y <- chains(x)[c("class", "chains", "chains1", "chains2", "chains3", "chains4")]
      names(y) <- c("class", "chains", "chains1", "chains2", "chains3", "chains4")
      if (is.na(y["chains"])){
        ch <- y[3:6]
        n <- sum(!is.na(ch))
        ch <- paste(ch[1:n], sep="", collapse=" ")
        y["chains"] <- sumChains(ch, n)
      }
      return(y)
    }, simplify = FALSE))
    results$Class <- comp[,"class"]
    results$CDB <- comp[,"chains"]
  } else {
    results$Adduct <- unlist(sapply(df$Adduct, function(x){
      adduct <- unlist(strsplit(x, "\\;"))[1]
      if (is.na(adduct)){adduct <- ""}
      return(adduct)
    }))
    results$LipidMSid <- df$ID
    results$Class <- df$Class
    results$CDB <- df$CDB
  }
  
  Form_Mn <- apply(results, 1, getFormula, dbs = dbs)
  if (is.matrix(Form_Mn)){
    new <- list()
    for (i in 1:ncol(Form_Mn)){
      new[[i]] <- c(Form_Mn[1,i], Form_Mn[2,i])
    }
    Form_Mn <- new
  }
  na <- which(unlist(lapply(Form_Mn, length)) == 0)
  if (length(na) > 0){
    Form_Mn[[na]] <- c(NA, NA)
  }
  Formula <- unlist(lapply(Form_Mn, "[[", 1))
  RT <- results$RT
  RTminutes <- results$RTminutes
  Mn <- as.numeric(unlist(lapply(Form_Mn, "[[", 2)))
  adducts <- sapply(as.vector(results$Adduct), strsplit, ";")
  mzs <- rep(list(vector()), nrow(results))
  for (i in 1:nrow(results)){
    ad <- adducts[[i]]
    for (a in 1:length(ad)){
      adinfo <- adductsTable[adductsTable$adduct == ad[a],]
      mz <- (adinfo$n*Mn[i]+adinfo$mdif)/abs(adinfo$charge)
      mzs[[i]] <- append(mzs[[i]], mz)
    }
  }
  Name <- results$LipidMSid
  inclusionList <- vector()
  for (i in 1:nrow(results)){
    for (a in 1:length(adducts[[i]])){
      inclusionList <- rbind(inclusionList,
                             data.frame(Formula[i], RT[i], RTminutes[i], Mn[i],
                                        mzs[[i]][a], adducts[[i]][a],
                                        Name[i], stringsAsFactors = F))
    }
  }
  colnames(inclusionList) <- c("Formula", "RT", "RTminutes", "Mn", "mz", 
                               "Adduct", "LipidMSid")
  inclusionList <- unique(inclusionList)
  return(inclusionList)
}

# searchIsotopes
#' Targeted isotopes search
#'
#' This function uses annotation results of deisotoped data to search
#' for isotopes in raw data.
#'
#' @param msobject msobject.
#' @param label isotope employed for the experiment. It can be "13C" or "D".
#' @param adductsTable adducts table employed for lipids annotation.
#' @param ppm mass error tolerance.
#' @param coelCutoff coelution score threshold between isotopes. By default, 0.7.
#' @param results target list to search isotopes. If missing, all results from the
#' msobject are searched. It is used by \link{searchIsotopesmsbatch}.
#' @param dbs list of data bases required for annotation. By default, dbs
#' contains the required data frames based on the default fragmentation rules.
#' If these rules are modified, dbs may need to be supplied. See \link{createLipidDB}
#' and \link{assignDB}.
#'
#' @return List with the isotopes for each compound in the results data frame.
#' 
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
searchIsotopes <- function(msobject,
                           label,
                           adductsTable = LipidMS::adductsTable,
                           ppm = 10,
                           coelCutoff = 0.7,
                           results, 
                           dbs){
  
  if (missing(dbs)){
    dbs <- assignDB()
  }
  
  ############################################################################
  # Target and raw data
  if(missing(results)){
    # List of target compounds
    results <- data.frame(mz = msobject$annotation$annotatedPeaklist$mz, 
                          RT = msobject$annotation$annotatedPeaklist$RT,
                          iniRT = msobject$annotation$annotatedPeaklist$minRT,
                          endRT = msobject$annotation$annotatedPeaklist$maxRT)
    results$LipidMSid <- unlist(sapply(msobject$annotation$annotatedPeaklist$LipidMSid, function(x){
      id <- unlist(strsplit(x, "[\\|;]"))[1]
      if (is.na(id)){id <- ""}
      return(id)
    }))
    results$Adduct <- unlist(sapply(msobject$annotation$annotatedPeaklist$Adduct, function(x){
      adduct <- unlist(strsplit(x, "[\\|;]"))[1]
      if (is.na(adduct)){adduct <- ""}
      return(adduct)
    }))
    results <- results[results$LipidMSid != "",]
    comp <- do.call(rbind, sapply(results$LipidMSid, function(x){
      y <- chains(x)[c("class", "chains", "chains1", "chains2", "chains3", "chains4")]
      names(y) <- c("class", "chains", "chains1", "chains2", "chains3", "chains4")
      if (is.na(y["chains"])){
        ch <- y[3:6]
        n <- sum(!is.na(ch))
        ch <- paste(ch[1:n], sep="", collapse=" ")
        y["chains"] <- sumChains(ch, n)
      }
      return(y)
    }, simplify = FALSE))
    results$Class <- comp[,"class"]
    results$CDB <- comp[,"chains"]
    results <- cbind(results, msobject$annotation$annotatedPeaklist[msobject$annotation$annotatedPeaklist$LipidMSid != "", 
                                               colnames(msobject$annotation$annotatedPeaklist) %in% 
                                                 make.names(msobject$metaData$generalMetadata$file)])
  }
  MS1 <- msobject$rawData$MS1
  
  ############################################################################
  # Make an index for raw MS data to speed up the function
  drt <- max(MS1$RT) - min(MS1$RT)
  mz <- MS1$mz
  ord <- order(mz)
  mz <- mz[ord]
  rt <- MS1$RT[ord]
  int <- MS1$int[ord]
  scan <- MS1$Scan[ord]
  part <- .Call("agglom", mz, rt, as.integer(1),
                ppm*2, drt,
                PACKAGE = "LipidMS")
  part <- part[order(part, decreasing = FALSE)]
  maxit <- max(part)
  index <- .Call("indexed", as.integer(part), int, 0, max(int),
                 as.integer(maxit), PACKAGE = "LipidMS")
  index <- cbind(index, mz[index[,1]])
  inimz <- index[,4]
  
  ############################################################################
  # Extract formula and mass of the target compounds
  results$Adduct <- unlist(sapply(results$Adduct, function(x){
    unlist(strsplit(x, "\\;"))[1]
  }))
  Form_Mn <- apply(results, 1, getFormula, dbs = dbs)
  formula <- unlist(lapply(Form_Mn, "[[", "Formula"))
  results$Formula <- formula
  results$Mn <- unlist(lapply(Form_Mn, "[[", "Mn"))
  comp <- do.call(rbind, sapply(results$Formula, function(x) {
    c <- CHNOSZ::makeup(x)
    c <- c[c("C", "H", "N", "O", "P")]
    names(c) <- c("C", "H", "N", "O", "P")
    return(as.data.frame(c))
  }))
  colnames(comp) <- c("C", "H", "N", "O", "P")
  results$C <- comp[,"C"]
  results$H <- comp[,"H"]
  if (label == "13C"){
    massdiff <- 1.0033548
    label <- "C"
  } else if (label == "D"){
    massdiff <- 1.006277
    label <- "H"
  }
  
  ############################################################################
  # Search isotopes
  isotopes <- apply(results, 1, function(x){
    ##########################################################################
    # get rt limits
    minRT <- as.numeric(x["iniRT"])
    maxRT <- as.numeric(x["endRT"])
    exactmz <- adductsTable$n[adductsTable$adduct == x["Adduct"]] * 
      (as.numeric(x["Mn"]) + adductsTable$mdiff[adductsTable$adduct == x["Adduct"]]) / 
      abs(adductsTable$charge[adductsTable$adduct == x["Adduct"]])
    top <- as.numeric(x[label])
    intensities <- c()
    mzs <- c()
    rts <- c()
    subsetPrev <- data.frame()
    for (i in 0:top){
      ##########################################################################
      # get mz limits
      minmz <- exactmz - exactmz*ppm/1e6 + i*massdiff
      maxmz <- exactmz + exactmz*ppm/1e6 + i*massdiff
      m <- which(inimz < maxmz)
      if (length(m) > 0){
        m <- m[length(m)]
        start <- index[m, 1]
        end <- index[m, 2]
        subset <- (start:end)[mz[start:end] >= minmz & mz[start:end] <= maxmz & 
                                rt[start:end] >= minRT & rt[start:end] <= maxRT]
      }
      if (length(subset) > 0){
        subsetMS1 <- MS1[ord[subset],]
        if (nrow(subsetMS1) > 0){
          subsetMS1 <- subsetMS1[order(subsetMS1$RT),]
          subsetMS1$int <- subsetMS1$int - min(subsetMS1$int) # baseline substraction
          if (nrow(subsetPrev) > 0){
            merged <- merge(subsetPrev, subsetMS1, by = "Scan")
            if (nrow(merged) > 2){
              coelScore <- cor(merged$int.x, merged$int.y)
              if (!is.na(coelScore) & coelScore >= coelCutoff){
                subsetPrev <- subsetMS1
                intensities <- c(intensities, sum(subsetMS1$int))
                mzs <- c(mzs, mean(subsetMS1$mz))
                rts <- c(rts, subsetMS1$RT[which.max(subsetMS1$int)])
              } else {
                intensities <- c(intensities, 0)
                mzs <- c(mzs, exactmz + i*massdiff)
                rts <- c(rts, as.numeric(x["RT"]))
              }
            } else {
              intensities <- c(intensities, 0)
              mzs <- c(mzs, exactmz + i*massdiff)
              rts <- c(rts, as.numeric(x["RT"]))
            } 
          } else {
            subsetPrev <- subsetMS1
            intensities <- c(intensities, sum(subsetMS1$int))
            mzs <- c(mzs, mean(subsetMS1$mz))
            rts <- c(rts, subsetMS1$RT[which.max(subsetMS1$int)])
          }
        } else {
          intensities <- c(intensities, 0)
          mzs <- c(mzs, exactmz + i*massdiff)
          rts <- c(rts, as.numeric(x["RT"]))
        }
      } else {
        intensities <- c(intensities, 0)
        mzs <- c(mzs, exactmz + i*massdiff)
        rts <- c(rts, as.numeric(x["RT"]))
      }
    }
    return(data.frame(LipidMSid = x["LipidMSid"],
                      Isotope = paste("[M+", 0:top, "]", sep = ""),
                      Adduct = x["Adduct"],
                      Formula = x["Formula"],
                      mz = mzs, 
                      RT = rts,  
                      RTminutes = round(rts/60, 2),
                      int = intensities,
                      stringsAsFactors = F, row.names = 1:length(intensities)))
  })
  isotopes <- data.frame(do.call(rbind, isotopes))
  rownames(isotopes) <- 1:nrow(isotopes)
  return(isotopes)
}

# searchIsotopesmsbatch
#' Targeted isotopes search for msbatch
#'
#' This function uses annotation results of deisotoped data to search
#' for isotopes in raw data.
#'
#' @param msbatch annotated msbatch.
#' @param label isotope employed for the experiment. It can be "13C" or "D".
#' @param adductsTable adducts table employed for lipids annotation.
#' @param ppm mass error tolerance.
#' @param coelCutoff coelution score threshold between isotopes. By default, 0.7.
#'
#' @return List with the isotopes for each compound in the results data frame.
#' 
#' @examples
#' \dontrun{
#' msbatch <- batchProcessing(metadata = "metadata.csv", polarity = "positive")
#' msbatch <- alignmsbatch(msbatch)
#' msbatch <- groupmsbatch(msbatch)
#' msbatch <- annotatemsbatch(msbatch)
#' searchIsotopesmsbatch(msbatch, label = "13C")
#' }
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
searchIsotopesmsbatch <- function(msbatch,
                           label,
                           adductsTable = LipidMS::adductsTable,
                           ppm = 10,
                           coelCutoff = 0.7){
  
  # List of target compounds
  results <- data.frame(mz = msbatch$features$mz, 
                        RT = msbatch$features$RT,
                        iniRT = msbatch$features$iniRT,
                        endRT = msbatch$features$endRT,
                        RTminutes = msbatch$features$RTminutes)
  results$LipidMSid <- unlist(sapply(msbatch$features$LipidMSid, function(x){
    id <- unlist(strsplit(x, "[\\|;]"))[1]
    if (is.na(id)){id <- ""}
    return(id)
  }))
  results$Adduct <- unlist(sapply(msbatch$features$Adduct, function(x){
    adduct <- unlist(strsplit(x, "[\\|;]"))[1]
    if (is.na(adduct)){adduct <- ""}
    return(adduct)
  }))
  results <- results[results$LipidMSid != "",]
  comp <- do.call(rbind, sapply(results$LipidMSid, function(x){
    y <- chains(x)[c("class", "chains", "chains1", "chains2", "chains3", "chains4")]
    names(y) <- c("class", "chains", "chains1", "chains2", "chains3", "chains4")
    if (is.na(y["chains"])){
      ch <- y[3:6]
      n <- sum(!is.na(ch))
      ch <- paste(ch[1:n], sep="", collapse=" ")
      y["chains"] <- sumChains(ch, n)
    }
    return(y)
  }, simplify = FALSE))
  results$Class <- comp[,"class"]
  results$CDB <- comp[,"chains"]
  results <- cbind(results, msbatch$features[msbatch$features$LipidMSid != "", 
                                             colnames(msbatch$features) %in% 
                                               make.names(msbatch$metaData$sample)])
  
  # for each msobject in the msbatch
  for (m in 1:length(msbatch$msobjects)){
    results$int <- results[,colnames(results) == make.names(msbatch$metaData$sample[m])]
    iso <- searchIsotopes(msbatch$msobjects[[m]],
                          label = label,
                          adductsTable = adductsTable,
                          ppm = ppm,
                          coelCutoff = coelCutoff,
                          results = results)
    if (m == 1){
      isotopes <- iso
    } else {
      isotopes <- cbind(isotopes, iso$int)
    }
  }
  colnames(isotopes) <- c("LipidMSid", "Isotope", "Adduct", "Formula", "mz", 
                          "RT", "RTminutes",
                          msbatch$metaData$sample)
  
  return(isotopes)
}
Any scripts or data that you put into this service are public.
LipidMS documentation built on May 29, 2024, 11:24 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
LipidMS
Lipid Annotation for LC-MS/MS DDA or DIA Data

R/additionalFunctions.R
In LipidMS: Lipid Annotation for LC-MS/MS DDA or DIA Data

Defines functions searchIsotopesmsbatch searchIsotopes getInclusionList assignDB createLipidDB rtdevplot ploteicmsbatch plotticmsbatch plotLipids

Documented in assignDB createLipidDB getInclusionList ploteicmsbatch plotLipids plotticmsbatch rtdevplot searchIsotopes searchIsotopesmsbatch

Try the LipidMS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

LipidMS Lipid Annotation for LC-MS/MS DDA or DIA Data

R/additionalFunctions.R In LipidMS: Lipid Annotation for LC-MS/MS DDA or DIA Data

Defines functions searchIsotopesmsbatch searchIsotopes getInclusionList assignDB createLipidDB rtdevplot ploteicmsbatch plotticmsbatch plotLipids

Documented in assignDB createLipidDB getInclusionList ploteicmsbatch plotLipids plotticmsbatch rtdevplot searchIsotopes searchIsotopesmsbatch

Try the LipidMS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

LipidMS
Lipid Annotation for LC-MS/MS DDA or DIA Data

R/additionalFunctions.R
In LipidMS: Lipid Annotation for LC-MS/MS DDA or DIA Data