MS DDA or DIA Data

Documented in assignDB createLipidDB getInclusionList ploteicmsbatch plotLipids plotticmsbatch rtdevplot searchIsotopes searchIsotopesmsbatch

# plotLipids
#' Plot informative peaks for lipid annotation
#'
#' Plot informative peaks for each lipid annotated with idPOS and idNEG (or
#' similar functions).
#'
#' @param msobject annotated msobject.
#' @param span smoothing parameter. Numeric value between 0 and 1.
#' @param ppm mz tolerance for EIC. If set to 0, the EIC will not be shown.
#' @param verbose print information messages.
#'
#' @return msobject with a plots element which contains a list of plots.
#' Plots on the left side represent raw values while plots on the left are
#' smoothed or clean scans (MS2 in DDA).
#'
#' @details Peak intensities are relative to the maximum intensity of each peak
#' to ease visualization.
#' 
#' Grey lines show the the extracted ion chromatograms for the peaks.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
plotLipids <- function(msobject, span = 0.4, ppm = 10, verbose = TRUE){

  ##############################################################################
  # Check arguments and inputs
  if (!"results" %in% names(msobject$annotation)){
    stop("No results to be plotted")
  }
  if (span < 0 | span > 1){
    span <- 0.4
    warning("span parameter set to 0.4")
  }
  if ("plots" %in% names(msobject$annotation)){
    if(verbose){cat("\n Removing previous plots...")}
    msobject$annotation$plots <- NULL
    if(verbose){cat("OK")}
  }
  results <- msobject$annotation$results[order(msobject$annotation$results$mz),]
  msobject$annotation$plots <- list()
  ##############################################################################
  # For each lipid in the results data frame, extract peaks
  while (nrow(results) > 0){
    r <- results$peakID[1]
    rclass <- results$Class[1]
    ############################################################################
    # MS1 peaks
    toremove <- which(results$peakID == r & results$Class == rclass)
    parent <- results[results$peakID == r & results$Class == rclass,]
    if (nrow(parent) > 1){
      parent$ID[1] <- paste(parent$ID, collapse = "|")
      parent$peakID[1] <- paste(unique(parent$peakID), collapse = "|")
      parent <- parent[1,]
    }
    adducts <- unlist(strsplit(parent$Adducts, ";"))
    ms1 <- c()
    for (a in adducts){
      ss <- msobject$annotation$detailsAnnotation[[parent$Class]]$candidates
      ss <- ss[grepl(gsub("+", "\\+", as.character(paste("^", a, sep="")), fixed = TRUE),
                     msobject$annotation$detailsAnnotation[[parent$Class]]$candidates$adducts),]
      ss <- ss[ss$cb == parent$CDB,]
      ss <- ss[order(abs(ss$RT - parent$RT), decreasing = FALSE),]
      ms1 <- rbind(ms1, ss[1,])
    }
    ms1$adducts <- as.character(sapply(ms1$adducts, function(x) unlist(strsplit(x, ";"))[[1]]))
    peaksMS1 <- ms1$peakID
    mzpeaksMS1 <- ms1$mz # to use in case data is DDA
    namesMS1 <- paste(as.character(round(ms1$mz, 3)), ms1$adducts, sep="_")
    
    # extract EIC for each MS1 peak
    eic <- list()
    for (m in 1:length(mzpeaksMS1)){
        eic[[m]] <- msobject$rawData$MS1[abs(msobject$rawData$MS1$mz - mzpeaksMS1[m])*1e6/mzpeaksMS1[m] <= ppm,, drop = FALSE]
        if (nrow(eic[[m]]) > 0){
          eic[[m]] <- eic[[m]][order(eic[[m]]$RT), c("RT", "int")]
        } else {
          eic[[m]] <- data.frame()
      }
    }
    
    ############################################################################
    # MS2 peaks
    peaksMS2 <- c()
    scansMS2 <- c() # to use in case data is DDA
    namesMS2 <- c()
    mzpeaksMS2 <- c() # to use in case data is DIA
    class <- c()
    chains <- c()

    # get index for all adducts (from candidates df)
    c <- which(msobject$annotation$detailsAnnotation[[parent$Class]]$candidates$peakID %in% ms1$peakID)

    # extract class fragments
    if ("classfragments" %in% names(msobject$annotation$detailsAnnotation[[parent$Class]])){
      class <- do.call(rbind, msobject$annotation$detailsAnnotation[[parent$Class]]$classfragments[c])
      if (length(class) > 0){
        if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
          peaksMS2 <- c(peaksMS2, class$peakID)
          mzpeaksMS2 <- c(mzpeaksMS2, class$mz)
        } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
          peaksMS2 <- c(peaksMS2, class$mz)
          scansMS2 <- c(scansMS2, class$peakID)
        }
        namesMS2 <- c(namesMS2, paste(as.character(round(class$mz, 3)), "class fragment", sep="_"))
      }
    }

    # extract chain fragments
    if ("chainfragments" %in% names(msobject$annotation$detailsAnnotation[[parent$Class]])){
      if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
        for (i in c){
          ch <- c()
          if (length(msobject$annotation$detailsAnnotation[[parent$Class]]$chainfragments[[i]]) > 0){
            ch <- do.call(rbind, msobject$annotation$detailsAnnotation[[parent$Class]]$chainfragments[[i]])
            if (length(ch) > 0){
              chains <- rbind(chains, ch)
              ch <- c()
            }
          }
        }
        if (length(chains) > 0){
          peaksMS2 <- c(peaksMS2, chains$peakID)
          namesMS2 <- c(namesMS2, paste(as.character(round(chains$mz, 3)), chains$db, chains$cb, chains$adduct, sep="_"))
          mzpeaksMS2 <- c(mzpeaksMS2, chains$mz)
        }
      } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
        for (i in c){
          ch <- c()
          if (length(msobject$annotation$detailsAnnotation[[parent$Class]]$chainfragments[[i]]) > 0){
            ch <- do.call(rbind, msobject$annotation$detailsAnnotation[[parent$Class]]$chainfragments[[i]])
            if (length(ch) > 0){
              chains <- rbind(chains, ch)
              ch <- c()
            }
          }
        }
        if (length(chains) > 0){
          chains <- unique(chains)
          chains <- chains[chains$mz != 0,]
          peaksMS2 <- c(peaksMS2, chains$mz)
          scansMS2 <- c(scansMS2, chains$peakID)
          namesMS2 <- c(namesMS2, paste(as.character(round(chains$mz, 3)), chains$db, chains$cb, chains$adduct, sep="_"))
        }
      }
    }
    
    # remove duplicates
    if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
      mzpeaksMS2 <- mzpeaksMS2[peaksMS2 != ""]
    } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
      scansMS2 <- scansMS2[peaksMS2 != ""]
    }
    namesMS2 <- namesMS2[peaksMS2 != ""]
    peaksMS2 <- peaksMS2[peaksMS2 != ""]
    if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
      mzpeaksMS2 <- mzpeaksMS2[!duplicated(peaksMS2)]
    } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
      scansMS2 <- scansMS2[!duplicated(peaksMS2)]
    }
    namesMS2 <- namesMS2[!duplicated(peaksMS2)]
    peaksMS2 <- peaksMS2[!duplicated(peaksMS2)]
    
    # if data is DIA, extract EIC for each MS2 peak
    if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
      eic2 <- list()
      if (length(mzpeaksMS2) > 0){
        for (m in 1:length(mzpeaksMS2)){
          eic2[[m]] <- msobject$rawData$MS2[abs(msobject$rawData$MS2$mz - mzpeaksMS2[m])*1e6/mzpeaksMS2[m] <= ppm,, drop = FALSE]
          if (nrow(eic2[[m]]) > 0){
            eic2[[m]] <- eic2[[m]][order(eic2[[m]]$RT), c("RT", "int")]
          } else {
            eic2[[m]] <- data.frame()
          }
        }
      }
    }
    
    # if data is DDA, extract spectrum data
    if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
      rawMS <- c()
      for (i in c){
        f <- c()
        if (length(msobject$annotation$detailsAnnotation[[parent$Class]]$coelfrags[[i]]) > 0){
          f <- msobject$annotation$detailsAnnotation[[parent$Class]]$coelfrags[[i]]
          if (length(f) > 0){
            rawMS <- rbind(rawMS, f)
            f <- c()
          }
        }
      }
    }

    ############################################################################
    # Plot
    
    # save par parameters
    oldpar <- graphics::par(no.readonly = TRUE, new = FALSE)
    on.exit(graphics::par(oldpar))
    
    colorsMS1 <- c("#42858C", "#FE9300", "#870E75", "#3E71A8", "#FE6900", 
                   grDevices::colors()[!grepl("white|gr[e|a]y", 
                                              grDevices::colors())])
    colorsMS2 <- c("#7F8E39", "#5F3659", "#E5C616", "#16A08CFF", "#628395",
                   "#C5D86D", "#969696FF", "#358359FF", "#9F4D23FF", 
                   "#D86C4FFF", "#170C2EFF", "#473B75FF", "#F19C1FFF",
                   "#117733", "#DDCC77", "#CC6677", "#88CCEE",
                   "#44AA99", "#332288", "#AA4499", "#999933",
                   "#882255", "#661100", "#6699CC", "#888888", 
                   grDevices::colors()[!grepl("white|gr[e|a]y", 
                                              grDevices::colors())])

    if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
      nplots <- 1 + length(unique(scansMS2))
    } else {
      nplots <- 2
    }
    grDevices::pdf(NULL) # use a pdf NULL device to save plots to an object
    grDevices::dev.control(displaylist = "enable")
    graphics::par(mfrow=c(nplots, 2), mar = c(3,4,4,1), mgp=c(2,1,0), bg = "white")
    # plot MS1 info
    if (length(peaksMS1) > 0){
      ssms1 <- msobject$rawData$MS1[msobject$rawData$MS1$peakID %in% peaksMS1,]
      ssms1 <- ssms1[order(ssms1$RT),]
      minrt1 <- min(ssms1$RT)
      maxrt1 <- max(ssms1$RT)
      ints <- c()
      for (p in 1:length(peaksMS1)){
        toplot <- msobject$rawData$MS1[msobject$rawData$MS1$peakID == peaksMS1[p],]
        toplot <- toplot[order(toplot$RT, decreasing = FALSE),]
        maxint <- max(toplot$int, na.rm=TRUE)
        ints <- append(ints, maxint)
        toplot$int <- toplot$int*100/maxint
        if (p == 1){
          if (nrow(toplot) > 1){
            plot(toplot$RT, toplot$int, type = "l", col = scales::alpha(colorsMS1[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste0("MS1: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                             as.character(round(parent$RT, 1)), sep="_"),
                               "\n(", paste(ms1$peakID, collapse="; "), ")"),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1)
            eic[[1]] <- eic[[1]][eic[[1]]$RT <= maxrt1+10 & eic[[1]]$RT >= minrt1-5,]
            graphics::lines(x=eic[[1]]$RT, y=eic[[1]]$int*100/maxint, 
                            col = scales::alpha("grey", 0.4), lwd = 2.5)
          } else {
            plot(0, 0, type = "n", col = scales::alpha(colorsMS1[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste0("MS1: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                              as.character(round(parent$RT, 1)), sep="_"),
                               "\n(", paste(ms1$peakID, collapse="; "), ")"),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1)
          }
          
        } else {
          if (nrow(toplot) > 1){
            graphics::lines(toplot$RT, toplot$int, col = scales::alpha(colorsMS1[p], 0.8), lwd = 2.5)
            eic[[p]] <- eic[[p]][eic[[p]]$RT <= maxrt1+10 & eic[[p]]$RT >= minrt1-5,]
            graphics::lines(eic[[p]]$RT, eic[[p]]$int*100/maxint, 
                          col = scales::alpha("grey", 0.4), lwd = 2.5)
          }
        }
      }
      graphics::legend("topright", legend=namesMS1,
             col=colorsMS1[1:length(peaksMS1)], lty=1, lwd = 2, cex=0.6)
      graphics::legend("bottomright", title = "Max. intensity",
             legend=formatC(ints, format = "e", digits = 2),
             col=colorsMS1[1:length(peaksMS1)], lty=1, lwd = 2, cex=0.6)

      # smoothed
      for (p in 1:length(peaksMS1)){
        toplot <- msobject$rawData$MS1[msobject$rawData$MS1$peakID == peaksMS1[p],]
        toplot <- toplot[order(toplot$RT, decreasing = FALSE),]
        pred <- tryCatch({stats::predict(stats::smooth.spline(toplot$RT, toplot$int, spar = span),
                                  x = toplot$RT)},
                         error = function(e) {return(list(x = toplot$RT,
                                                          y = toplot$int))})
        toplot$RT <- pred$x
        toplot$int <- pred$y
        maxint <- max(toplot$int, na.rm=TRUE)
        toplot$int <- toplot$int*100/maxint
        if (p == 1){
          if (nrow(toplot) > 1){
            plot(toplot$RT, toplot$int, type = "l", col = scales::alpha(colorsMS1[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste0("MS1: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                              as.character(round(parent$RT, 1)), sep="_"),
                               "\n(", paste(ms1$peakID, collapse="; "), ")"),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 5)
            smt <- tryCatch({stats::predict(stats::smooth.spline(eic[[1]]$RT, 
                                                                 eic[[1]]$int, 
                                                                 spar = span),
                                            x = eic[[1]]$RT)},
                            error = function(e) {return(list(x = eic[[1]]$RT,
                                                             y = eic[[1]]$int))})
            graphics::lines(x=smt$x, y=smt$y*100/maxint, 
                            col = scales::alpha("grey", 0.4), lwd = 2.5, lty = 5)
            
          } else {
            plot(0, 0, type = "n", col = scales::alpha(colorsMS1[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste0("MS1: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                              as.character(round(parent$RT, 1)), sep="_"),
                               "\n(", paste(ms1$peakID, collapse="; "), ")"),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 5)
          }
          
        } else {
          if (nrow(toplot) > 1){
            graphics::lines(toplot$RT, toplot$int, col = scales::alpha(colorsMS1[p], 0.8), lwd = 2.5,
                            lty = 5)
            smt <- tryCatch({stats::predict(stats::smooth.spline(eic[[p]]$RT, 
                                                                 eic[[p]]$int, 
                                                                 spar = span),
                                            x = eic[[p]]$RT)},
                            error = function(e) {return(list(x = eic[[p]]$RT,
                                                             y = eic[[p]]$int))})
            graphics::lines(x=smt$x, y=smt$y*100/maxint, 
                            col = scales::alpha("grey", 0.4), lwd = 2.5, lty = 5)
          }
        }
      }
      graphics::legend("topright", legend=namesMS1,
             col=colorsMS1[1:length(peaksMS1)], lty = 5, lwd = 2, cex=0.6)
      graphics::legend("bottomright", title = "Max. intensity",
             legend=formatC(ints, format = "e", digits = 2),
             col=colorsMS1[1:length(peaksMS1)], lty = 5, lwd = 2, cex=0.6)
    }

    # plot MS2 info
    if (length(peaksMS2) > 0){
      # if data is DIA
      if (msobject$metaData$generalMetadata$acquisitionmode == "DIA"){
        ssms2 <- msobject$rawData$MS2[msobject$rawData$MS2$peakID %in% peaksMS2,]
        minrt2 <- min(ssms2$RT)
        maxrt2 <- max(ssms2$RT)
        maxint2 <- max(ssms2$int)
        ints2 <- c()
        for (p in 1:length(peaksMS2)){
          toplot <- msobject$rawData$MS2[msobject$rawData$MS2$peakID == peaksMS2[p],]
          toplot <- toplot[order(toplot$RT, decreasing = FALSE),]
          maxint <- max(toplot$int, na.rm=TRUE)
          ints2 <- append(ints2, maxint)
          toplot$int <- toplot$int*100/maxint
          if (p == 1){
            if (nrow(toplot) > 1){
              plot(toplot$RT, toplot$int, type = "l", col = scales::alpha(colorsMS2[p], 0.8),
                   xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                   lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                   main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                               as.character(round(parent$RT, 1)), sep="_"), sep = ""),
                   las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1)
              eic2[[1]] <- eic2[[1]][eic2[[1]]$RT <= maxrt1+10 & eic2[[1]]$RT >= minrt1-5,]
              graphics::lines(x=eic2[[1]]$RT, y=eic2[[1]]$int*100/maxint, 
                              col = scales::alpha("grey", 0.4), lwd = 2.5)
            } else {
              plot(0, 0, type = "n", col = scales::alpha(colorsMS2[p], 0.8),
                   xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                   lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                   main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                               as.character(round(parent$RT, 1)), sep="_"), sep = ""),
                   las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1)
            }
            
          } else {
            if (nrow(toplot) > 1){
              graphics::lines(toplot$RT, toplot$int, col = scales::alpha(colorsMS2[p], 0.8), lwd = 2.5)
              eic2[[p]] <- eic2[[p]][eic2[[p]]$RT <= maxrt1+10 & eic2[[p]]$RT >= minrt1-5,]
              graphics::lines(x=eic2[[p]]$RT, y=eic2[[p]]$int*100/maxint, 
                              col = scales::alpha("grey", 0.4), lwd = 2.5)
            }
          }
        }
        graphics::legend("topright", legend=namesMS2,
               col=colorsMS2[1:length(peaksMS2)], lty=1, lwd = 2, cex=0.6)
        graphics::legend("bottomright", title = "Max. intensity",
               legend=formatC(ints2, format = "e", digits = 2),
               col=colorsMS2[1:length(peaksMS2)], lty=1, lwd = 2, cex=0.6)

        # smoothed
        for (p in 1:length(peaksMS2)){
          toplot <- msobject$rawData$MS2[msobject$rawData$MS2$peakID == peaksMS2[p],]
          toplot <- toplot[order(toplot$RT, decreasing = FALSE),]
          maxint <- max(toplot$int, na.rm=TRUE)
          pred <- tryCatch({stats::predict(stats::smooth.spline(toplot$RT, toplot$int, spar = span),
                                    x = toplot$RT)},
                           error = function(e) {return(list(x = toplot$RT,
                                                            y = toplot$int))})
          toplot$RT <- pred$x
          toplot$int <- pred$y
          toplot$int <- toplot$int*100/maxint
          if (p == 1){
            if (nrow(toplot) > 1){
              plot(toplot$RT, toplot$int, type = "l", col = scales::alpha(colorsMS2[p], 0.8),
                 xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                 lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                 main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                             as.character(round(parent$RT, 1)), sep="_"), sep = ""),
                 las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 5)
              smt <- tryCatch({stats::predict(stats::smooth.spline(eic2[[1]]$RT, 
                                                                 eic2[[1]]$int, 
                                                                 spar = span),
                                            x = eic2[[1]]$RT)},
                            error = function(e) {return(list(x = eic2[[1]]$RT,
                                                             y = eic2[[1]]$int))})
              graphics::lines(x=smt$x, y=smt$y*100/maxint, 
                            col = scales::alpha("grey", 0.4), lwd = 2.5, lty = 5)
            } else {
              plot(0, 0, type = "n", col = scales::alpha(colorsMS2[p], 0.8),
                   xlim = c(minrt1-5, maxrt1+10), ylim = c(0, 110),
                   lwd = 2.5, ylab = "Rel. Intensity", xlab = "RT (sec)",
                   main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                               as.character(round(parent$RT, 1)), sep="_"), sep = ""),
                   las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 5)
            }
            
          } else {
            if (nrow(toplot) > 1){
              graphics::lines(toplot$RT, toplot$int, col = scales::alpha(colorsMS2[p], 0.8), lwd = 2.5,
                              lty = 5)
              smt <- tryCatch({stats::predict(stats::smooth.spline(eic2[[p]]$RT, 
                                                                   eic2[[p]]$int, 
                                                                   spar = span),
                                              x = eic2[[p]]$RT)},
                              error = function(e) {return(list(x = eic2[[p]]$RT,
                                                               y = eic2[[p]]$int))})
              graphics::lines(x=smt$x, y=smt$y*100/maxint,  
                              col = scales::alpha("grey", 0.4), lwd = 2.5, lty = 5)
            }
          }
        }
        graphics::legend("topright", legend=namesMS2,
               col=colorsMS2[1:length(peaksMS2)], lty = 5, lwd = 2, cex=0.6)
        graphics::legend("bottomright", title = "Max. intensity",
               legend=formatC(ints2, format = "e", digits = 2),
               col=colorsMS2[1:length(peaksMS2)], lty = 5, lwd = 2, cex=0.6)
        # if data is DDA
      } else if (msobject$metaData$generalMetadata$acquisitionmode == "DDA"){
        # for each scan
        for (s in unique(scansMS2)){
          # subset raw data
          ssrawMS <- rawMS[rawMS$peakID == s,]
          ssmaxrawMS <- max(ssrawMS$int)
          ssrawMS$int <- ssrawMS$int*100/max(ssrawMS$int)
          ssrawMS$int[ssrawMS$int < 2] <- ssrawMS$int[ssrawMS$int < 2] + 2 # to improve visualization
          mz2 <- peaksMS2[scansMS2 == s]
          namesmz2 <- namesMS2[scansMS2 == s]
          namesmz2 <- namesmz2[order(mz2, decreasing = FALSE)]
          mz2 <- mz2[order(mz2, decreasing = FALSE)]
          
          # assign colors
          ssrawMS$color <- "black"
          ssrawMS$color[ssrawMS$mz %in% mz2] <- colorsMS2[1:sum(ssrawMS$mz %in% mz2)]

          # Find precursor in the MS/MS spectrum
          scanprec <- unlist(strsplit(s, "_"))
          collisionenergy <- as.numeric(scanprec[2])
          scanprec <- as.numeric(scanprec[3])
          precursor <- msobject$metaData$scansMetadata$precursor[
            which(msobject$metaData$scansMetadata$msLevel == 2 &
                    msobject$metaData$scansMetadata$collisionEnergy == collisionenergy)[scanprec]]
          prec <- as.numeric(unlist(sapply(precursor, mzMatch, ssrawMS$mz, ppm = 10)))
          if (length(prec) > 0){
            minppm <- which.min(prec[seq(2, length(prec), 2)])
            prec <- prec[seq(1, length(prec), 2)][minppm]
            mzprec <- ssrawMS$mz[prec]
            nameprec <- paste(round(mzprec, 3), "_precursor", sep="")
            if (ssrawMS$color[prec] == "black"){
              ssrawMS$color[prec] <- colorsMS1[1]
              namesmz2 <- c(namesmz2, nameprec)
            }
          }
          colors2 <- ssrawMS$color[ssrawMS$color != "black"]
          blacks <- ssrawMS$color == "black"
          
          #plot
          plot(ssrawMS$mz[blacks], ssrawMS$int[blacks], type = "h", 
               col = scales::alpha(ssrawMS$color[blacks], 0.7),
               xlim = c(0, max(ssrawMS$mz)+20), ylim = c(0, 132),
               lwd = 1, ylab = "Rel. Intensity", xlab = "m/z",
               main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                           as.character(round(parent$RT, 1)), sep="_"),
                            paste("\nPrecursor: ", round(precursor, 3), sep = ""),
                            sep = ""),
               las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 1, yaxt = "n" )
          lines(ssrawMS$mz[!blacks], ssrawMS$int[!blacks], type = "h", 
               col = scales::alpha(ssrawMS$color[!blacks], 1))
          graphics::axis(2,at=seq(2, 122, 20), labels = seq(0, 120, 20))
          graphics::legend("topright", legend=namesmz2,
                 col=colors2, lty = 1, lwd = 2, cex=0.6)

          # clean
          ssrawMSclean <- ssrawMS[!blacks,]
          plot(ssrawMSclean$mz, ssrawMSclean$int, type = "h",
               col = scales::alpha(ssrawMSclean$color, 1),
               xlim = c(0, max(ssrawMS$mz)+20), ylim = c(0, 132),
               lwd = 1.5, ylab = "Rel. Intensity", xlab = "m/z",
               main = paste("MS2: ", paste(parent$ID, as.character(round(parent$mz, 2)),
                                           as.character(round(parent$RT, 1)), sep="_"),
                            paste("\nPrecursor: ", round(precursor, 3), sep = ""),
                            sep = ""),
               las = 1, cex.axis = 0.7, cex.lab = 1, cex.main = 1, lty = 1, yaxt = "n" )
          graphics::axis(2,at=seq(2, 102, 20), labels = seq(0, 100, 20))
          graphics::legend("topright", legend=namesmz2,
                 col=colors2, lty = 1, lwd = 2, cex=0.6)
        }
      }
    }
    msobject$annotation$plots[[r]] <- grDevices::recordPlot() # save plot
    invisible(grDevices::dev.off()) # close pdf NULL device
    results <- results[-toremove,]
  }
  return(msobject)
}

# plotticmsbatch
#' TIC for all samples in a msbatch
#'
#' TIC for all samples in a msbatch
#'
#' @param msbatch msbatch
#' @param rt numeric vector with the RT range to be plotted
#' @param colorbygroup logical. If TRUE, samples will be coloured based on their
#' sample group (from metadata). 
#'
#' @return plot
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
plotticmsbatch <- function(msbatch, rt, colorbygroup = TRUE){
  if (missing(rt)){
    maxs <- unlist(lapply(msbatch$msobjects, function(x) x$metaData$generalMetadata$endTime))
    mins <- unlist(lapply(msbatch$msobjects, function(x) x$metaData$generalMetadata$startTime))
    rt <- c(min(mins), max(maxs))
  }
  maxint <- max(unlist(lapply(msbatch$msobjects, function(x) x$metaData$scansMetadata$totIonCurrent))) 
  
  # set color palette
  palette <- c("#42858C", "#FE9300", "#870E75", "#3E71A8", 
               "#7F8E39", "#5F3659", "#E5C616",
               "#16A08CFF", "#FE6900", "#628395", "#C5D86D", "#969696FF",
               "#358359FF", "#9F4D23FF", "#D86C4FFF", "#170C2EFF",
               "#473B75FF", "#F19C1FFF",
               "#117733", "#DDCC77", "#CC6677", "#88CCEE",
               "#44AA99", "#332288", "#AA4499", "#999933",
               "#882255", "#661100", "#6699CC", "#888888")
  if (length(msbatch$msobjects) > length(palette)){
    set.seed(19580811)
    colors <- grDevices::colors()[grep('gr(a|e)y|white|light', grDevices::colors(), invert = T)]
    colors <- sample(colors, size = (length(msbatch$msobjects) - length(palette)))
    palette <- c(palette, colors)
  }
  if (colorbygroup){
    samplescolor <- as.factor(msbatch$metaData$sampletype)
    legendnames <- levels(as.factor(msbatch$metaData$sampletype))
  } else {
    samplescolor <- 1:length(msbatch$msobjects)
    legendnames <- msbatch$metaData$sample
  }
  
  # plot tics (if there is MS1 and MS2, use just MS1)
  whichmslevel <- lapply(msbatch$msobjects, function(x) 
    unique(x$metaData$scansMetadata$msLevel))
  x <- msbatch$msobjects[[1]]$metaData$scansMetadata$RT
  y <- msbatch$msobjects[[1]]$metaData$scansMetadata$totIonCurrent
  if (all(c(1, 2) %in% whichmslevel[[1]])){
    x <- x[which(msbatch$msobjects[[1]]$metaData$scansMetadata$msLevel == 1)]
    y <- y[msbatch$msobjects[[1]]$metaData$scansMetadata$msLevel == 1]
  }
  plot(x = x, y = y,
       xlim = rt, ylim = c(0, maxint+maxint*0.1), 
       type = "l", col = scales::alpha(palette[samplescolor[1]], 0.7),
       main = "Total Ion Chromatograms", xlab = "RT (sec)", ylab = "Intensity")
  
  for (i in 2:length(msbatch$msobjects)){
    x <- msbatch$msobjects[[i]]$metaData$scansMetadata$RT
    y <- msbatch$msobjects[[i]]$metaData$scansMetadata$totIonCurrent
    if (all(c(1, 2) %in% whichmslevel[[i]])){
      x <- x[msbatch$msobjects[[i]]$metaData$scansMetadata$msLevel == 1]
      y <- y[msbatch$msobjects[[i]]$metaData$scansMetadata$msLevel == 1]
    }
    lines(x = x, y = y,
          type = "l", col = scales::alpha(palette[samplescolor[i]], 0.7))
  }
  legend("topright", legend = legendnames, 
         col = scales::alpha(palette, 0.7), lty = 1, lwd = 2, cex = 0.5)
}

# ploteicmsbatch
#' EIC for all samples in a msbatch
#'
#' EIC for all samples in a msbatch
#'
#' @param msbatch msbatch
#' @param mz mz of interest
#' @param ppm mass tolerance in ppm
#' @param rt numeric vector with the RT range to be plotted
#' @param colorbygroup logical. If TRUE, samples will be coloured based on their
#' sample group (from metadata).
#' @param verbose print information messages.
#'
#' @return plot
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
ploteicmsbatch <- function(msbatch, 
                           mz, 
                           ppm, 
                           rt, 
                           colorbygroup = TRUE, 
                           verbose = TRUE){
  if (missing(rt)){
    maxs <- unlist(lapply(msbatch$msobjects, function(x) x$metaData$generalMetadata$endTime))
    rt <- c(0, max(maxs))
  }
  # extract eics
  eic <- list()
  for (s in 1:length(msbatch$msobjects)){
    e <- msbatch$msobjects[[s]]$rawData$MS1[abs(msbatch$msobjects[[s]]$rawData$MS1$mz - mz)*1e6/mz < ppm,, drop = FALSE]
    e <- e[e$RT >= rt[1] & e$RT <= rt[2],]
    if (nrow(e) > 0){
      eic[[s]] <- e[order(e$RT), c("RT", "int")]
    } else {
      eic[[s]] <- data.frame()
    }
    
  }
  maxint <- max(unlist(lapply(eic, function(x) if(nrow(x) > 0){max(x$int, na.rm = TRUE)} else {0})), na.rm = TRUE) 
  
  # set color palette
  palette <- c("#42858C", "#FE9300", "#870E75", "#3E71A8", 
               "#7F8E39", "#5F3659", "#E5C616",
               "#16A08CFF", "#FE6900", "#628395", "#C5D86D", "#969696FF",
               "#358359FF", "#9F4D23FF", "#D86C4FFF", "#170C2EFF",
               "#473B75FF", "#F19C1FFF",
               "#117733", "#DDCC77", "#CC6677", "#88CCEE",
               "#44AA99", "#332288", "#AA4499", "#999933",
               "#882255", "#661100", "#6699CC", "#888888")
  if (length(msbatch$msobjects) > length(palette)){
    set.seed(19580811)
    colors <- grDevices::colors()[grep('gr(a|e)y|white|light', grDevices::colors(), invert = T)]
    colors <- sample(colors, size = (length(msbatch$msobjects) - length(palette)))
    palette <- c(palette, colors)
  }
  if (colorbygroup){
    samplescolor <- as.factor(msbatch$metaData$sampletype)
    legendnames <- levels(as.factor(msbatch$metaData$sampletype))
  } else {
    samplescolor <- 1:length(msbatch$msobjects)
    legendnames <- msbatch$metaData$sample
  }
  
  # plot eics
  startat <- 1
  start <- FALSE
  while (!start){
    if (nrow(eic[[startat]]) > 0){
      smt <- tryCatch({stats::predict(stats::smooth.spline(eic[[startat]]$RT,
                                                           eic[[startat]]$int,
                                                           spar = 0.05),
                                      x = eic[[startat]]$RT)},
                      error = function(e) {return(list(x = eic[[startat]]$RT,
                                                       y = eic[[startat]]$int))})
      smt$y[smt$y < 0] <- 0
      
      plot(smt$x, smt$y, xlim = rt, 
           ylim = c(0, maxint+maxint*0.1), type = "l", lwd = 2,
           col = scales::alpha(palette[samplescolor[startat]], 0.7), 
           main = paste("EIC: ", mz, sep=""), 
           xlab = "RT (sec)", ylab = "Intensity")
      start <- TRUE
    } else if (nrow(eic[[startat]]) == 0 & startat < length(msbatch$msobjects)) {
      startat <- startat + 1
    } else {
      if(verbose){cat("No peaks found")}
      plot(0, xlim = rt, ylim = c(0, 100), type = "l", lwd = 2,
           col = scales::alpha(palette[samplescolor[1]], 0.7), 
           main = paste("EIC: ", mz, sep=""), 
           xlab = "RT (sec)", ylab = "Intensity")
      legend("topright", legend = msbatch$metaData$sample, 
             col = scales::alpha(palette, 0.7), lty = 1, lwd = 2, cex = 0.5)
      break
    }
  }
  if (length(eic) > startat){
    if (start){
      for (i in (startat+1):length(msbatch$msobjects)){
        if (nrow(eic[[i]] > 0)){
          smt <- tryCatch({stats::predict(stats::smooth.spline(eic[[i]]$RT,
                                                               eic[[i]]$int,
                                                               spar = 0.05),
                                          x = eic[[i]]$RT)},
                          error = function(e) {return(list(x = eic[[i]]$RT,
                                                           y = eic[[i]]$int))})
          smt$y[smt$y < 0] <- 0
          
          lines(smt$x, smt$y, xlim = rt, type = "l", lwd = 2, 
                col = scales::alpha(palette[samplescolor[i]], 0.7), 
                main = paste("EIC: ", mz, sep=""), 
                xlab = "RT (sec)", ylab = "Intensity")
        }
      }
      legend("topright", legend = legendnames, 
             col = scales::alpha(palette, 0.7), lty = 1, lwd = 2, cex = 0.5)
    }
  }
}

# rtdevplot
#' Plot retention time deviation
#'
#' Plot retention time deviation of an aligned msbatch
#'
#' @param msbatch aligned msbatch.
#' @param colorbygroup logical. If TRUE, samples will be coloured based on their
#' sample group (from metadata).
#'
#' @return plot
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
rtdevplot <- function(msbatch, colorbygroup = TRUE){
  # set color palette
  palette <- c("#42858C", "#FE9300", "#870E75", "#3E71A8", 
               "#7F8E39", "#5F3659", "#E5C616",
               "#16A08CFF", "#FE6900", "#628395", "#C5D86D", "#969696FF",
               "#358359FF", "#9F4D23FF", "#D86C4FFF", "#170C2EFF",
               "#473B75FF", "#F19C1FFF",
               "#117733", "#DDCC77", "#CC6677", "#88CCEE",
               "#44AA99", "#332288", "#AA4499", "#999933",
               "#882255", "#661100", "#6699CC", "#888888")
  if (length(msbatch$msobjects) > length(palette)){
    set.seed(19580811)
    colors <- grDevices::colors()[grep('gr(a|e)y|white|light', grDevices::colors(), invert = T)]
    colors <- sample(colors, size = (length(msbatch$msobjects) - length(palette)))
    palette <- c(palette, colors)
  }
  if (colorbygroup){
    samplescolor <- as.factor(msbatch$metaData$sampletype)
    legendnames <- levels(as.factor(msbatch$metaData$sampletype))
  } else {
    samplescolor <- 1:length(msbatch$msobjects)
    legendnames <- msbatch$metaData$sample
  }
  
  maxdev <- max(unlist(lapply(msbatch$alignment$rtdevcorrected, function(x) x$RTdev)))
  mindev <- min(unlist(lapply(msbatch$alignment$rtdevcorrected, function(x) x$RTdev)))
  xmax <- max(unlist(lapply(msbatch$msobjects, function(x) x$metaData$generalMetadata$endTime)))
  
  # remove zeros at the start and the end of the rtdev vector
  x <- msbatch$alignment$rtdevcorrected[[1]]$RT
  y <- msbatch$alignment$rtdevcorrected[[1]]$RTdev
  x <- x[cumsum(y) & rev(cumsum(rev(y)))]
  y <- y[cumsum(y) & rev(cumsum(rev(y)))]
  plot(x = x, y = y, 
       xlim = c(0, xmax+xmax/10), ylim = c(mindev-1, maxdev+1), type = "l", 
       col = scales::alpha(palette[samplescolor[1]], 0.7), lwd = 2, 
       main = "Retention time deviation", xlab = "RT (sec)", ylab = "RT dev (sec)")
  lines(x = x, y = rep(0, length(x)), col = "darkgrey", lwd = 1, lty = 2)
  for (s in 2:length(msbatch$msobjects)){
    x <- msbatch$alignment$rtdevcorrected[[s]]$RT
    y <- msbatch$alignment$rtdevcorrected[[s]]$RTdev
    x <- x[cumsum(y) & rev(cumsum(rev(y)))]
    y <- y[cumsum(y) & rev(cumsum(rev(y)))]
    lines(x = x, y = y, col = scales::alpha(palette[samplescolor[s]], 0.7))
  }
  legend("topright", legend = legendnames, 
         col = scales::alpha(palette, 0.7), lty = 1, lwd = 2, cex = 0.5)
}

# createLipidDB
#' Customizable lipid DBs creator
#'
#' It allows to create easy-customizable lipid DBs for annotation with LipidMS
#' package.
#'
#' @param lipid character value indicating the class of lipid. See Details.
#' @param chains character vector indicating the FA chains to be employed
#' @param chains2 character vector containing the sphingoid bases to be employed
#' if required.
#'
#' @return List with the requested dbs (data frames)
#'
#' @details \code{lipidClass} argument needs to be one of the following
#' character values: "Cer", "CerP", "GlcCer", "SM", "Carnitine", "CE", "FA",
#' "HFA", "Sph" (sphingoid bases), "SphP", "MG", "LPA", , "LPC",
#' "LPE", "LPG", "LPI", "LPS", "FAHFA", "DG", "PC", "PE", "PG", "PI", "PS",
#' "PA", "TG", "CL" or "all".
#'
#' @examples
#' fas <- c("8:0", "10:0", "12:0", "14:0", "14:1", "15:0", "16:0", "16:1",
#' "17:0", "18:0", "18:1", "18:2", "18:3", "18:4", "20:0", "20:1", "20:2",
#' "20:3", "20:4", "20:5", "22:0", "22:1", "22:2", "22:3", "22:4", "22:5",
#' "22:6", "24:0", "24:1", "26:0")
#' sph <- c("16:0", "16:1", "18:0", "18:1")
#' newdb <- createLipidDB(lipid = "PC", chains = fas, chains2 = sph)
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
createLipidDB <- function(lipid, chains, chains2){
  customizedDataSets <- list()
  if (sum(lipid == "Cer" || lipid == "CerP" || lipid == "GlcCer" ||
          lipid == "SM" || lipid == "AcylCer") == 1){
    db <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = lipid)
    db <- data.frame(formula=db$formula, total=db$total,
                     Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                     stringsAsFactors = F)
    if (lipid == "Cer"){
      customizedDataSets[["cerdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                  Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "CerP"){
      customizedDataSets[["cerPdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "GlcCer"){
      customizedDataSets[["glccerdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "SM"){
      customizedDataSets[["smdb"]] <- data.frame(formula=db$formula,
                                                 total=db$total, Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "AcylCer"){
      customizedDataSets[["acylcerdb"]] <- data.frame(formula=db$formula,
                                                 total=db$total, Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
  } else if (sum(lipid == "FA" || lipid == "HFA" || lipid == "Carnitine" ||
                 lipid == "LPA" || lipid == "LPE" || lipid == "LPG" ||
                 lipid == "LPI" || lipid == "LPS" || lipid == "LPC" ||
                 lipid == "MG" || lipid == "CE" || lipid == "Sph" ||
                 lipid == "SphP" || lipid == "LPEo" || lipid == "LPAo" ||
                 lipid == "LPCp" || lipid == "LPCo" || lipid == "LPEp") == 1){
    if (lipid %in% c("Sph", "SphP")){
      db <- dbOneChain(chains = chains2, lipid = lipid)
      db <- data.frame(formula=db$formula, total=db$total,
                       Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                       stringsAsFactors = F)
    } else {
      db <- dbOneChain(chains = chains, lipid = lipid)
      db <- data.frame(formula=db$formula, total=db$total,
                       Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                       stringsAsFactors = F)
    }
    if (lipid == "FA"){
      customizedDataSets[["fadb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "HFA"){
      customizedDataSets[["hfadb"]] <- data.frame(formula=db$formula, total=db$total,
                                                  Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "Carnitine"){
      customizedDataSets[["carnitinedb"]] <- data.frame(formula=db$formula, total=db$total,
                                                        Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPA"){
      customizedDataSets[["lysopadb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPE"){
      customizedDataSets[["lysopedb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPG"){
      customizedDataSets[["lysopgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPI"){
      customizedDataSets[["lysopidb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPS"){
      customizedDataSets[["lysopsdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPC"){
      customizedDataSets[["lysopcdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                     Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "MG"){
      customizedDataSets[["mgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "CE"){
      customizedDataSets[["CEdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "Sph"){
      customizedDataSets[["sphdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                  Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "SphP"){
      customizedDataSets[["sphPdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPEo"){
      customizedDataSets[["lysopeodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPAo"){
      customizedDataSets[["lysopaodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPCp"){
      customizedDataSets[["lysopcpdb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPCo"){
      customizedDataSets[["lysopcodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "LPEp"){
      customizedDataSets[["lysopepdb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
  } else if (sum(lipid == "DG" || lipid == "PC" || lipid == "PE" ||
                 lipid == "PG" || lipid == "PI" || lipid == "PS" || lipid == "PIP" ||
                 lipid == "PIP2" ||lipid == "PIP3" || lipid == "FAHFA" ||
                 lipid == "PA") == 1 || lipid == "PEo" || lipid == "PCp" ||
             lipid == "PCo" || lipid == "PEp"){
    db <- dbTwoChains(chains = chains, lipid = lipid)
    db <- data.frame(formula=db$formula, total=db$total,
                     Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                     stringsAsFactors = F)
    if (lipid == "FAHFA"){
      customizedDataSets[["fahfadb"]] <- data.frame(formula=db$formula, total=db$total,
                                                    Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "DG"){
      customizedDataSets[["dgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PE"){
      customizedDataSets[["pedb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PG"){
      customizedDataSets[["pgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PI"){
      customizedDataSets[["pidb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PIP"){
      customizedDataSets[["pipdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                  Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PIP2"){
      customizedDataSets[["pip2db"]] <- data.frame(formula=db$formula, total=db$total,
                                                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PIP3"){
      customizedDataSets[["pip3db"]] <- data.frame(formula=db$formula, total=db$total,
                                                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PS"){
      customizedDataSets[["psdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PC"){
      customizedDataSets[["pcdb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PA"){
      customizedDataSets[["padb"]] <- data.frame(formula=db$formula, total=db$total,
                                                 Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PEo"){
      customizedDataSets[["peodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PCp"){
      customizedDataSets[["pcpdb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PCo"){
      customizedDataSets[["pcodb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
    if (lipid == "PEp"){
      customizedDataSets[["pepdb"]] <-
        data.frame(formula=db$formula, total=db$total,
                   Mass=as.numeric(db$Mass), stringsAsFactors = F)
    }
  } else if (lipid == "TG") {
    db <- dbThreeChains(chains = chains, lipid = lipid)
    db <- data.frame(formula=db$formula, total=db$total,
                     Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                     stringsAsFactors = F)
    customizedDataSets[["tgdb"]] <- data.frame(formula=db$formula, total=db$total,
                                               Mass=as.numeric(db$Mass), stringsAsFactors = F)
  } else if (lipid == "CL") {
    db <- dbFourChains(chains = chains, lipid = lipid)
    db <- data.frame(formula=db$formula, total=db$total,
                     Mass=as.numeric(db$Mass), ID = paste(lipid, "(", db$total, ")", sep=""),
                     stringsAsFactors = F)
    customizedDataSets[["cldb"]] <- data.frame(formula=db$formula, total=db$total,
                                               Mass=as.numeric(db$Mass), stringsAsFactors = F)
  } else if (lipid == "all"){
    ceramides <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "Cer")
    ceramides <- data.frame(formula=ceramides$formula, total=ceramides$total,
                            Mass=as.numeric(ceramides$Mass), ID = paste("Cer(", ceramides$total,
                                                                        ")", sep=""), stringsAsFactors = F)
    
    ceramidesP <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "CerP")
    ceramidesP <- data.frame(formula=ceramidesP$formula, total=ceramidesP$total,
                             Mass=as.numeric(ceramidesP$Mass), ID = paste("CerP(", ceramidesP$total,
                                                                          ")", sep=""), stringsAsFactors = F)
    
    acylcer <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "AcylCer")
    acylcer <- data.frame(formula=acylcer$formula, total=acylcer$total, 
                          Mass=as.numeric(acylcer$Mass), ID = paste("AcylCer(", acylcer$total,
                                                                    ")", sep=""), stringsAsFactors = F)
    
    glccer <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "GlcCer")
    glccer <- data.frame(formula=glccer$formula, total=glccer$total,
                         Mass=as.numeric(glccer$Mass), ID = paste("GlcCer(", glccer$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    
    sm <- dbSphingolipids(chains = chains, chains2 = chains2, lipid = "SM")
    sm <- data.frame(formula=sm$formula, total=sm$total,
                     Mass=as.numeric(sm$Mass), ID = paste("SM(", sm$total,
                                                          ")", sep=""), stringsAsFactors = F)
    fa <- dbOneChain(chains = chains, lipid = "FA")
    fa <- data.frame(formula=fa$formula, total=fa$total,
                     Mass=as.numeric(fa$Mass), ID = paste("FA(", fa$total,
                                                          ")", sep=""), stringsAsFactors = F)
    hfa <- dbOneChain(chains = chains, lipid = "HFA")
    hfa <- data.frame(formula=hfa$formula, total=hfa$total,
                      Mass=as.numeric(hfa$Mass), ID = paste("HFA(", hfa$total,
                                                            ")", sep=""), stringsAsFactors = F)
    carnitine <- dbOneChain(chains = chains, lipid = "Carnitine")
    carnitine <- data.frame(formula=carnitine$formula, total=carnitine$total,
                            Mass=as.numeric(carnitine$Mass), ID = paste("Carnitine(", carnitine$total,
                                                                        ")", sep=""), stringsAsFactors = F)
    CE <- dbOneChain(chains = chains, lipid = "CE")
    CE <- data.frame(formula=CE$formula, total=CE$total,
                     Mass=as.numeric(CE$Mass), ID = paste("CE(", CE$total,
                                                          ")", sep=""), stringsAsFactors = F)
    mg <- dbOneChain(chains = chains, lipid = "MG")
    mg <- data.frame(formula=mg$formula, total=mg$total,
                     Mass=as.numeric(mg$Mass), ID = paste("MG(", mg$total,
                                                          ")", sep=""), stringsAsFactors = F)
    sph <- dbOneChain(chains = chains2, lipid = "Sph")
    sph <- data.frame(formula=sph$formula, total=sph$total,
                      Mass=as.numeric(sph$Mass), ID = paste("Sph(", sph$total,
                                                            ")", sep=""), stringsAsFactors = F)
    nlsph <- sph[,1:3]
    nlsph$Mass <- nlsph$Mass - 44.05
    
    sphP <- dbOneChain(chains = chains2, lipid = "SphP")
    sphP <- data.frame(formula=sphP$formula, total=sphP$total,
                       Mass=as.numeric(sphP$Mass), ID = paste("SphP(", sphP$total,
                                                              ")", sep=""), stringsAsFactors = F)
    lysopc <- dbOneChain(chains = chains, lipid = "LPC")
    lysopc <- data.frame(formula=lysopc$formula, total=lysopc$total,
                         Mass=as.numeric(lysopc$Mass), ID = paste("LPC(", lysopc$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysope <- dbOneChain(chains = chains, lipid = "LPE")
    lysope <- data.frame(formula=lysope$formula, total=lysope$total,
                         Mass=as.numeric(lysope$Mass), ID = paste("LPE(", lysope$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysopg <- dbOneChain(chains = chains, lipid = "LPG")
    lysopg <- data.frame(formula=lysopg$formula, total=lysopg$total,
                         Mass=as.numeric(lysopg$Mass), ID = paste("LPG(", lysopg$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysopi <- dbOneChain(chains = chains, lipid = "LPI")
    lysopi <- data.frame(formula=lysopi$formula, total=lysopi$total,
                         Mass=as.numeric(lysopi$Mass), ID = paste("LPI(", lysopi$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysops <- dbOneChain(chains = chains, lipid = "LPS")
    lysops <- data.frame(formula=lysops$formula, total=lysops$total,
                         Mass=as.numeric(lysops$Mass), ID = paste("LPS(", lysops$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    lysopa <- dbOneChain(chains = chains, lipid = "LPA")
    lysopa <- data.frame(formula=lysopa$formula, total=lysopa$total,
                         Mass=as.numeric(lysopa$Mass), ID = paste("LPA(", lysopa$total,
                                                                  ")", sep=""), stringsAsFactors = F)
    pc <- dbTwoChains(chains = chains, lipid = "PC")
    pc <- data.frame(formula=pc$formula, total=pc$total,
                     Mass=as.numeric(pc$Mass), ID = paste("PC(", pc$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pe <- dbTwoChains(chains = chains, lipid = "PE")
    pe <- data.frame(formula=pe$formula, total=pe$total,
                     Mass=as.numeric(pe$Mass), ID = paste("PE(", pe$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pg <- dbTwoChains(chains = chains, lipid = "PG")
    pg <- data.frame(formula=pg$formula, total=pg$total,
                     Mass=as.numeric(pg$Mass), ID = paste("PG(", pg$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pi <- dbTwoChains(chains = chains, lipid = "PI")
    pi <- data.frame(formula=pi$formula, total=pi$total,
                     Mass=as.numeric(pi$Mass), ID = paste("PI(", pi$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pip <- dbTwoChains(chains = chains, lipid = "PIP")
    pip <- data.frame(formula=pip$formula, total=pip$total,
                      Mass=as.numeric(pip$Mass), ID = paste("PIP(", pip$total,
                                                            ")", sep=""), stringsAsFactors = F)
    pip2 <- dbTwoChains(chains = chains, lipid = "PIP2")
    pip2 <- data.frame(formula=pip2$formula, total=pip2$total,
                       Mass=as.numeric(pip2$Mass), ID = paste("PIP2(", pip2$total,
                                                              ")", sep=""), stringsAsFactors = F)
    pip3 <- dbTwoChains(chains = chains, lipid = "PIP3")
    pip3 <- data.frame(formula=pip3$formula, total=pip3$total,
                       Mass=as.numeric(pip3$Mass), ID = paste("PIP3(", pip3$total,
                                                              ")", sep=""), stringsAsFactors = F)
    ps <- dbTwoChains(chains = chains, lipid = "PS")
    ps <- data.frame(formula=ps$formula, total=ps$total,
                     Mass=as.numeric(ps$Mass), ID = paste("PS(", ps$total,
                                                          ")", sep=""), stringsAsFactors = F)
    pa <- dbTwoChains(chains = chains, lipid = "PA")
    pa <- data.frame(formula=pa$formula, total=pa$total,
                     Mass=as.numeric(pa$Mass), ID = paste("PA(", pa$total,
                                                          ")", sep=""), stringsAsFactors = F)
    fahfa <- dbTwoChains(chains = chains, lipid = "FAHFA")
    fahfa <- data.frame(formula=fahfa$formula, total=fahfa$total,
                        Mass=as.numeric(fahfa$Mass), ID = paste("FAHFA(", fahfa$total,
                                                                ")", sep=""), stringsAsFactors = F)
    dg <- dbTwoChains(chains = chains, lipid = "DG")
    dg <- data.frame(formula=dg$formula, total=dg$total,
                     Mass=as.numeric(dg$Mass), ID = paste("DG(", dg$total,
                                                          ")", sep=""), stringsAsFactors = F)
    tg <- dbThreeChains(chains = chains, lipid = "TG")
    tg <- data.frame(formula=tg$formula, total=tg$total,
                     Mass=as.numeric(tg$Mass), ID = paste("TG(", tg$total,
                                                          ")", sep=""), stringsAsFactors = F)
    cl <- dbFourChains(chains = chains, lipid = "CL")
    cl <- data.frame(formula=cl$formula, total=cl$total,
                     Mass=as.numeric(cl$Mass), ID = paste("CL(", cl$total,
                                                          ")", sep=""), stringsAsFactors = F)
    peo <- dbTwoChains(chains = chains, lipid = "PEo")
    peo <- data.frame(formula=peo$formula, total=peo$total,
                      Mass=as.numeric(peo$Mass),
                      ID = paste("PEo(", peo$total, ")", sep=""),
                      stringsAsFactors = F)
    lysopeo <- dbOneChain(chains = chains, lipid = "LPEo")
    lysopeo <- data.frame(formula=lysopeo$formula, total=lysopeo$total,
                          Mass=as.numeric(lysopeo$Mass),
                          ID = paste("LPEo(", lysopeo$total, ")", sep=""),
                          stringsAsFactors = F)
    lysopao <- dbOneChain(chains = chains, lipid = "LPAo")
    lysopao <- data.frame(formula=lysopao$formula, total=lysopao$total,
                          Mass=as.numeric(lysopao$Mass),
                          ID = paste("LPAo(", lysopao$total, ")", sep=""),
                          stringsAsFactors = F)
    pcp <- dbTwoChains(chains = chains, lipid = "PCp")
    pcp <- data.frame(formula=pcp$formula, total=pcp$total,
                      Mass=as.numeric(pcp$Mass),
                      ID = paste("PCp(", pcp$total, ")", sep=""),
                      stringsAsFactors = F)
    lysopcp <- dbOneChain(chains = chains, lipid = "LPCp")
    lysopcp <- data.frame(formula=lysopcp$formula, total=lysopcp$total,
                          Mass=as.numeric(lysopcp$Mass),
                          ID = paste("LPCp(", lysopcp$total, ")", sep=""),
                          stringsAsFactors = F)
    pco <- dbTwoChains(chains = chains, lipid = "PCo")
    pco <- data.frame(formula=pco$formula, total=pco$total,
                      Mass=as.numeric(pco$Mass),
                      ID = paste("PCo(", pco$total, ")", sep=""),
                      stringsAsFactors = F)
    lysopco <- dbOneChain(chains = chains, lipid = "LPCo")
    lysopco <- data.frame(formula=lysopco$formula, total=lysopco$total,
                          Mass=as.numeric(lysopco$Mass),
                          ID = paste("LPCo(", lysopco$total, ")", sep=""),
                          stringsAsFactors = F)
    pep <- dbTwoChains(chains = chains, lipid = "PEp")
    pep <- data.frame(formula=pep$formula, total=pep$total,
                      Mass=as.numeric(pep$Mass),
                      ID = paste("PEp(", pco$total, ")", sep=""),
                      stringsAsFactors = F)
    lysopep <- dbOneChain(chains = chains, lipid = "LPEp")
    lysopep <- data.frame(formula=lysopep$formula, total=lysopep$total,
                          Mass=as.numeric(lysopep$Mass),
                          ID = paste("LPEp(", lysopep$total, ")", sep=""),
                          stringsAsFactors = F)
    
    
    customizedDataSets[["cerdb"]] <- ceramides
    customizedDataSets[["cerPdb"]] <- ceramidesP
    customizedDataSets[["acylcerdb"]] <- acylcer
    customizedDataSets[["glccerdb"]] <- glccer
    customizedDataSets[["smdb"]] <- sm
    customizedDataSets[["fadb"]] <- fa
    customizedDataSets[["hfadb"]] <- hfa
    customizedDataSets[["carnitinedb"]] <- carnitine
    customizedDataSets[["lysopadb"]] <- lysopa
    customizedDataSets[["lysopedb"]] <- lysope
    customizedDataSets[["lysopgdb"]] <- lysopg
    customizedDataSets[["lysopidb"]] <- lysopi
    customizedDataSets[["lysopsdb"]] <- lysops
    customizedDataSets[["lysopcdb"]] <- lysopc
    customizedDataSets[["mgdb"]] <- mg
    customizedDataSets[["CEdb"]] <- CE
    customizedDataSets[["sphdb"]] <- sph
    customizedDataSets[["sphPdb"]] <- sphP
    customizedDataSets[["fahfadb"]] <- fahfa
    customizedDataSets[["pedb"]] <- pe
    customizedDataSets[["pgdb"]] <- pg
    customizedDataSets[["pidb"]] <- pi
    customizedDataSets[["pipdb"]] <- pip
    customizedDataSets[["pip2db"]] <- pip2
    customizedDataSets[["pip3db"]] <- pip3
    customizedDataSets[["psdb"]] <- ps
    customizedDataSets[["padb"]] <- pa
    customizedDataSets[["pcdb"]] <- pc
    customizedDataSets[["dgdb"]] <- dg
    customizedDataSets[["tgdb"]] <- tg
    customizedDataSets[["cldb"]] <- cl
    customizedDataSets[["peodb"]] <- peo
    customizedDataSets[["lysopeodb"]] <- lysopeo
    customizedDataSets[["lysopaodb"]] <- lysopao
    customizedDataSets[["pcpdb"]] <- pcp
    customizedDataSets[["lysopcpdb"]] <- lysopcp
    customizedDataSets[["pcodb"]] <- pco
    customizedDataSets[["lysopcodb"]] <- lysopco
    customizedDataSets[["pepdb"]] <- pep
    customizedDataSets[["lysopepdb"]] <- lysopep
    customizedDataSets[["nlsphdb"]] <- nlsph
    customizedDataSets[["badb"]] <- LipidMS::badb
    customizedDataSets[["baconjdb"]] <- LipidMS::baconjdb
    customizedDataSets[["adductsTable"]] <- LipidMS::adductsTable
  }
  return(customizedDataSets)
}

# assignDB
#' Load LipidMS default data bases
#'
#' load all LipidMS default data bases required to run identification functions.
#'
#' @return list of data frames
#'
#' @examples
#' \dontrun{
#' dbs <- assignDB()
#' }
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
assignDB <- function(){
  dbs <- list()
  dbs[["cerdb"]] <- LipidMS::cerdb
  dbs[["cerPdb"]] <- LipidMS::cerPdb
  dbs[["acylcerdb"]] <- LipidMS::acylcerdb
  dbs[["smdb"]] <- LipidMS::smdb
  dbs[["fadb"]] <- LipidMS::fadb
  dbs[["hfadb"]] <- LipidMS::hfadb
  dbs[["carnitinedb"]] <- LipidMS::carnitinesdb
  dbs[["lysopadb"]] <- LipidMS::lysopadb
  dbs[["lysopaodb"]] <- LipidMS::lysopaodb
  dbs[["lysopedb"]] <- LipidMS::lysopedb
  dbs[["lysopeodb"]] <- LipidMS::lysopeodb
  dbs[["lysopepdb"]] <- LipidMS::lysopepdb
  dbs[["lysopgdb"]] <- LipidMS::lysopgdb
  dbs[["lysopidb"]] <- LipidMS::lysopidb
  dbs[["lysopsdb"]] <- LipidMS::lysopsdb
  dbs[["lysopcdb"]] <- LipidMS::lysopcdb
  dbs[["lysopcodb"]] <- LipidMS::lysopcodb
  dbs[["lysopcpdb"]] <- LipidMS::lysopcpdb
  dbs[["mgdb"]] <- LipidMS::mgdb
  dbs[["CEdb"]] <- LipidMS::CEdb
  dbs[["sphdb"]] <- LipidMS::sphdb
  dbs[["sphPdb"]] <- LipidMS::sphPdb
  dbs[["fahfadb"]] <- LipidMS::fahfadb
  dbs[["pcdb"]] <- LipidMS::pcdb
  dbs[["pcodb"]] <- LipidMS::pcodb
  dbs[["pcpdb"]] <- LipidMS::pcpdb
  dbs[["pedb"]] <- LipidMS::pedb
  dbs[["peodb"]] <- LipidMS::peodb
  dbs[["pepdb"]] <- LipidMS::pepdb
  dbs[["pgdb"]] <- LipidMS::pgdb
  dbs[["pidb"]] <- LipidMS::pidb
  dbs[["psdb"]] <- LipidMS::psdb
  dbs[["padb"]] <- LipidMS::padb
  dbs[["dgdb"]] <- LipidMS::dgdb
  dbs[["tgdb"]] <- LipidMS::tgdb
  dbs[["cldb"]] <- LipidMS::cldb
  dbs[["badb"]] <- LipidMS::badb
  dbs[["baconjdb"]] <- LipidMS::baconjdb
  dbs[["nlsphdb"]] <- LipidMS::nlsphdb
  dbs[["adductsTable"]] <- LipidMS::adductsTable
  return(dbs)
}

# getInclusionList
#' Obtain an inclusion list from the annotation results
#'
#' Obtain an inclusion list for the identified lipids.
#'
#' @param df data frame. Output of identification functions (results table from
#' an msobject or feature table from an msbatch).
#' @param dbs list of data bases required for annotation. By default, dbs
#' contains the required data frames based on the default fragmentation rules.
#' If these rules are modified, dbs may need to be supplied. See \link{createLipidDB}
#' and \link{assignDB}.
#'
#' @return Data frame with 6 columns: formula, RT, neutral mass, m/z, adduct
#' and the LipidMSid.
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
getInclusionList <- function(df, dbs){
  
  if (missing(dbs)){
    dbs <- assignDB()
  }
  adductsTable = dbs$adductsTable
  
  if(!any(c("LipidMSid", "ID") %in% colnames(df))){
    stop("df must be annotated (i.e. results table from an msobject or feature table from an msbatch)")
  }
  
  results <- data.frame(mz = df$mz, RT = df$RT, Adduct = df$Adduct)
  if ("LipidMSid" %in% colnames(df)){
    results$LipidMSid <- unlist(sapply(df$LipidMSid, function(x){
      id <- unlist(strsplit(x, "[\\|;]"))[1]
      if (is.na(id)){id <- ""}
      return(id)
    }))
    results <- results[results$LipidMSid != "",]
    comp <- do.call(rbind, sapply(results$LipidMSid, function(x){
      y <- chains(x)[c("class", "chains", "chains1", "chains2", "chains3", "chains4")]
      names(y) <- c("class", "chains", "chains1", "chains2", "chains3", "chains4")
      if (is.na(y["chains"])){
        ch <- y[3:6]
        n <- sum(!is.na(ch))
        ch <- paste(ch[1:n], sep="", collapse=" ")
        y["chains"] <- sumChains(ch, n)
      }
      return(y)
    }, simplify = FALSE))
    results$Class <- comp[,"class"]
    results$CDB <- comp[,"chains"]
  } else {
    results$Adduct <- unlist(sapply(df$Adduct, function(x){
      adduct <- unlist(strsplit(x, "\\;"))[1]
      if (is.na(adduct)){adduct <- ""}
      return(adduct)
    }))
    results$LipidMSid <- df$ID
    results$Class <- df$Class
    results$CDB <- df$CDB
  }
  
  Form_Mn <- apply(results, 1, getFormula, dbs = dbs)
  if (is.matrix(Form_Mn)){
    new <- list()
    for (i in 1:ncol(Form_Mn)){
      new[[i]] <- c(Form_Mn[1,i], Form_Mn[2,i])
    }
    Form_Mn <- new
  }
  na <- which(unlist(lapply(Form_Mn, length)) == 0)
  if (length(na) > 0){
    Form_Mn[[na]] <- c(NA, NA)
  }
  Formula <- unlist(lapply(Form_Mn, "[[", 1))
  RT <- results$RT
  RTminutes <- results$RTminutes
  Mn <- as.numeric(unlist(lapply(Form_Mn, "[[", 2)))
  adducts <- sapply(as.vector(results$Adduct), strsplit, ";")
  mzs <- rep(list(vector()), nrow(results))
  for (i in 1:nrow(results)){
    ad <- adducts[[i]]
    for (a in 1:length(ad)){
      adinfo <- adductsTable[adductsTable$adduct == ad[a],]
      mz <- (adinfo$n*Mn[i]+adinfo$mdif)/abs(adinfo$charge)
      mzs[[i]] <- append(mzs[[i]], mz)
    }
  }
  Name <- results$LipidMSid
  inclusionList <- vector()
  for (i in 1:nrow(results)){
    for (a in 1:length(adducts[[i]])){
      inclusionList <- rbind(inclusionList,
                             data.frame(Formula[i], RT[i], RTminutes[i], Mn[i],
                                        mzs[[i]][a], adducts[[i]][a],
                                        Name[i], stringsAsFactors = F))
    }
  }
  colnames(inclusionList) <- c("Formula", "RT", "RTminutes", "Mn", "mz", 
                               "Adduct", "LipidMSid")
  inclusionList <- unique(inclusionList)
  return(inclusionList)
}

# searchIsotopes
#' Targeted isotopes search
#'
#' This function uses annotation results of deisotoped data to search
#' for isotopes in raw data.
#'
#' @param msobject msobject.
#' @param label isotope employed for the experiment. It can be "13C" or "D".
#' @param adductsTable adducts table employed for lipids annotation.
#' @param ppm mass error tolerance.
#' @param coelCutoff coelution score threshold between isotopes. By default, 0.7.
#' @param results target list to search isotopes. If missing, all results from the
#' msobject are searched. It is used by \link{searchIsotopesmsbatch}.
#' @param dbs list of data bases required for annotation. By default, dbs
#' contains the required data frames based on the default fragmentation rules.
#' If these rules are modified, dbs may need to be supplied. See \link{createLipidDB}
#' and \link{assignDB}.
#'
#' @return List with the isotopes for each compound in the results data frame.
#' 
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
searchIsotopes <- function(msobject,
                           label,
                           adductsTable = LipidMS::adductsTable,
                           ppm = 10,
                           coelCutoff = 0.7,
                           results, 
                           dbs){
  
  if (missing(dbs)){
    dbs <- assignDB()
  }
  
  ############################################################################
  # Target and raw data
  if(missing(results)){
    # List of target compounds
    results <- data.frame(mz = msobject$annotation$annotatedPeaklist$mz, 
                          RT = msobject$annotation$annotatedPeaklist$RT,
                          iniRT = msobject$annotation$annotatedPeaklist$minRT,
                          endRT = msobject$annotation$annotatedPeaklist$maxRT)
    results$LipidMSid <- unlist(sapply(msobject$annotation$annotatedPeaklist$LipidMSid, function(x){
      id <- unlist(strsplit(x, "[\\|;]"))[1]
      if (is.na(id)){id <- ""}
      return(id)
    }))
    results$Adduct <- unlist(sapply(msobject$annotation$annotatedPeaklist$Adduct, function(x){
      adduct <- unlist(strsplit(x, "[\\|;]"))[1]
      if (is.na(adduct)){adduct <- ""}
      return(adduct)
    }))
    results <- results[results$LipidMSid != "",]
    comp <- do.call(rbind, sapply(results$LipidMSid, function(x){
      y <- chains(x)[c("class", "chains", "chains1", "chains2", "chains3", "chains4")]
      names(y) <- c("class", "chains", "chains1", "chains2", "chains3", "chains4")
      if (is.na(y["chains"])){
        ch <- y[3:6]
        n <- sum(!is.na(ch))
        ch <- paste(ch[1:n], sep="", collapse=" ")
        y["chains"] <- sumChains(ch, n)
      }
      return(y)
    }, simplify = FALSE))
    results$Class <- comp[,"class"]
    results$CDB <- comp[,"chains"]
    results <- cbind(results, msobject$annotation$annotatedPeaklist[msobject$annotation$annotatedPeaklist$LipidMSid != "", 
                                               colnames(msobject$annotation$annotatedPeaklist) %in% 
                                                 make.names(msobject$metaData$generalMetadata$file)])
  }
  MS1 <- msobject$rawData$MS1
  
  ############################################################################
  # Make an index for raw MS data to speed up the function
  drt <- max(MS1$RT) - min(MS1$RT)
  mz <- MS1$mz
  ord <- order(mz)
  mz <- mz[ord]
  rt <- MS1$RT[ord]
  int <- MS1$int[ord]
  scan <- MS1$Scan[ord]
  part <- .Call("agglom", mz, rt, as.integer(1),
                ppm*2, drt,
                PACKAGE = "LipidMS")
  part <- part[order(part, decreasing = FALSE)]
  maxit <- max(part)
  index <- .Call("indexed", as.integer(part), int, 0, max(int),
                 as.integer(maxit), PACKAGE = "LipidMS")
  index <- cbind(index, mz[index[,1]])
  inimz <- index[,4]
  
  ############################################################################
  # Extract formula and mass of the target compounds
  results$Adduct <- unlist(sapply(results$Adduct, function(x){
    unlist(strsplit(x, "\\;"))[1]
  }))
  Form_Mn <- apply(results, 1, getFormula, dbs = dbs)
  formula <- unlist(lapply(Form_Mn, "[[", "Formula"))
  results$Formula <- formula
  results$Mn <- unlist(lapply(Form_Mn, "[[", "Mn"))
  comp <- do.call(rbind, sapply(results$Formula, function(x) {
    c <- CHNOSZ::makeup(x)
    c <- c[c("C", "H", "N", "O", "P")]
    names(c) <- c("C", "H", "N", "O", "P")
    return(as.data.frame(c))
  }))
  colnames(comp) <- c("C", "H", "N", "O", "P")
  results$C <- comp[,"C"]
  results$H <- comp[,"H"]
  if (label == "13C"){
    massdiff <- 1.0033548
    label <- "C"
  } else if (label == "D"){
    massdiff <- 1.006277
    label <- "H"
  }
  
  ############################################################################
  # Search isotopes
  isotopes <- apply(results, 1, function(x){
    ##########################################################################
    # get rt limits
    minRT <- as.numeric(x["iniRT"])
    maxRT <- as.numeric(x["endRT"])
    exactmz <- adductsTable$n[adductsTable$adduct == x["Adduct"]] * 
      (as.numeric(x["Mn"]) + adductsTable$mdiff[adductsTable$adduct == x["Adduct"]]) / 
      abs(adductsTable$charge[adductsTable$adduct == x["Adduct"]])
    top <- as.numeric(x[label])
    intensities <- c()
    mzs <- c()
    rts <- c()
    subsetPrev <- data.frame()
    for (i in 0:top){
      ##########################################################################
      # get mz limits
      minmz <- exactmz - exactmz*ppm/1e6 + i*massdiff
      maxmz <- exactmz + exactmz*ppm/1e6 + i*massdiff
      m <- which(inimz < maxmz)
      if (length(m) > 0){
        m <- m[length(m)]
        start <- index[m, 1]
        end <- index[m, 2]
        subset <- (start:end)[mz[start:end] >= minmz & mz[start:end] <= maxmz & 
                                rt[start:end] >= minRT & rt[start:end] <= maxRT]
      }
      if (length(subset) > 0){
        subsetMS1 <- MS1[ord[subset],]
        if (nrow(subsetMS1) > 0){
          subsetMS1 <- subsetMS1[order(subsetMS1$RT),]
          subsetMS1$int <- subsetMS1$int - min(subsetMS1$int) # baseline substraction
          if (nrow(subsetPrev) > 0){
            merged <- merge(subsetPrev, subsetMS1, by = "Scan")
            if (nrow(merged) > 2){
              coelScore <- cor(merged$int.x, merged$int.y)
              if (!is.na(coelScore) & coelScore >= coelCutoff){
                subsetPrev <- subsetMS1
                intensities <- c(intensities, sum(subsetMS1$int))
                mzs <- c(mzs, mean(subsetMS1$mz))
                rts <- c(rts, subsetMS1$RT[which.max(subsetMS1$int)])
              } else {
                intensities <- c(intensities, 0)
                mzs <- c(mzs, exactmz + i*massdiff)
                rts <- c(rts, as.numeric(x["RT"]))
              }
            } else {
              intensities <- c(intensities, 0)
              mzs <- c(mzs, exactmz + i*massdiff)
              rts <- c(rts, as.numeric(x["RT"]))
            } 
          } else {
            subsetPrev <- subsetMS1
            intensities <- c(intensities, sum(subsetMS1$int))
            mzs <- c(mzs, mean(subsetMS1$mz))
            rts <- c(rts, subsetMS1$RT[which.max(subsetMS1$int)])
          }
        } else {
          intensities <- c(intensities, 0)
          mzs <- c(mzs, exactmz + i*massdiff)
          rts <- c(rts, as.numeric(x["RT"]))
        }
      } else {
        intensities <- c(intensities, 0)
        mzs <- c(mzs, exactmz + i*massdiff)
        rts <- c(rts, as.numeric(x["RT"]))
      }
    }
    return(data.frame(LipidMSid = x["LipidMSid"],
                      Isotope = paste("[M+", 0:top, "]", sep = ""),
                      Adduct = x["Adduct"],
                      Formula = x["Formula"],
                      mz = mzs, 
                      RT = rts,  
                      RTminutes = round(rts/60, 2),
                      int = intensities,
                      stringsAsFactors = F, row.names = 1:length(intensities)))
  })
  isotopes <- data.frame(do.call(rbind, isotopes))
  rownames(isotopes) <- 1:nrow(isotopes)
  return(isotopes)
}

# searchIsotopesmsbatch
#' Targeted isotopes search for msbatch
#'
#' This function uses annotation results of deisotoped data to search
#' for isotopes in raw data.
#'
#' @param msbatch annotated msbatch.
#' @param label isotope employed for the experiment. It can be "13C" or "D".
#' @param adductsTable adducts table employed for lipids annotation.
#' @param ppm mass error tolerance.
#' @param coelCutoff coelution score threshold between isotopes. By default, 0.7.
#'
#' @return List with the isotopes for each compound in the results data frame.
#' 
#' @examples
#' \dontrun{
#' msbatch <- batchProcessing(metadata = "metadata.csv", polarity = "positive")
#' msbatch <- alignmsbatch(msbatch)
#' msbatch <- groupmsbatch(msbatch)
#' msbatch <- annotatemsbatch(msbatch)
#' searchIsotopesmsbatch(msbatch, label = "13C")
#' }
#'
#' @author M Isabel Alcoriza-Balaguer <maribel_alcoriza@iislafe.es>
searchIsotopesmsbatch <- function(msbatch,
                           label,
                           adductsTable = LipidMS::adductsTable,
                           ppm = 10,
                           coelCutoff = 0.7){
  
  # List of target compounds
  results <- data.frame(mz = msbatch$features$mz, 
                        RT = msbatch$features$RT,
                        iniRT = msbatch$features$iniRT,
                        endRT = msbatch$features$endRT,
                        RTminutes = msbatch$features$RTminutes)
  results$LipidMSid <- unlist(sapply(msbatch$features$LipidMSid, function(x){
    id <- unlist(strsplit(x, "[\\|;]"))[1]
    if (is.na(id)){id <- ""}
    return(id)
  }))
  results$Adduct <- unlist(sapply(msbatch$features$Adduct, function(x){
    adduct <- unlist(strsplit(x, "[\\|;]"))[1]
    if (is.na(adduct)){adduct <- ""}
    return(adduct)
  }))
  results <- results[results$LipidMSid != "",]
  comp <- do.call(rbind, sapply(results$LipidMSid, function(x){
    y <- chains(x)[c("class", "chains", "chains1", "chains2", "chains3", "chains4")]
    names(y) <- c("class", "chains", "chains1", "chains2", "chains3", "chains4")
    if (is.na(y["chains"])){
      ch <- y[3:6]
      n <- sum(!is.na(ch))
      ch <- paste(ch[1:n], sep="", collapse=" ")
      y["chains"] <- sumChains(ch, n)
    }
    return(y)
  }, simplify = FALSE))
  results$Class <- comp[,"class"]
  results$CDB <- comp[,"chains"]
  results <- cbind(results, msbatch$features[msbatch$features$LipidMSid != "", 
                                             colnames(msbatch$features) %in% 
                                               make.names(msbatch$metaData$sample)])
  
  # for each msobject in the msbatch
  for (m in 1:length(msbatch$msobjects)){
    results$int <- results[,colnames(results) == make.names(msbatch$metaData$sample[m])]
    iso <- searchIsotopes(msbatch$msobjects[[m]],
                          label = label,
                          adductsTable = adductsTable,
                          ppm = ppm,
                          coelCutoff = coelCutoff,
                          results = results)
    if (m == 1){
      isotopes <- iso
    } else {
      isotopes <- cbind(isotopes, iso$int)
    }
  }
  colnames(isotopes) <- c("LipidMSid", "Isotope", "Adduct", "Formula", "mz", 
                          "RT", "RTminutes",
                          msbatch$metaData$sample)
  
  return(isotopes)
}
maialba3/LipidMS documentation built on Sept. 6, 2024, 9:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
maialba3/LipidMS
Lipid Annotation for LC-MS/MS DDA or DIA Data

R/additionalFunctions.R
In maialba3/LipidMS: Lipid Annotation for LC-MS/MS DDA or DIA Data

Defines functions searchIsotopesmsbatch searchIsotopes getInclusionList assignDB createLipidDB rtdevplot ploteicmsbatch plotticmsbatch plotLipids

Documented in assignDB createLipidDB getInclusionList ploteicmsbatch plotLipids plotticmsbatch rtdevplot searchIsotopes searchIsotopesmsbatch

R Package Documentation

Browse R Packages

We want your feedback!

maialba3/LipidMS Lipid Annotation for LC-MS/MS DDA or DIA Data

R/additionalFunctions.R In maialba3/LipidMS: Lipid Annotation for LC-MS/MS DDA or DIA Data

Defines functions searchIsotopesmsbatch searchIsotopes getInclusionList assignDB createLipidDB rtdevplot ploteicmsbatch plotticmsbatch plotLipids

Documented in assignDB createLipidDB getInclusionList ploteicmsbatch plotLipids plotticmsbatch rtdevplot searchIsotopes searchIsotopesmsbatch

R Package Documentation

Browse R Packages

We want your feedback!

maialba3/LipidMS
Lipid Annotation for LC-MS/MS DDA or DIA Data

R/additionalFunctions.R
In maialba3/LipidMS: Lipid Annotation for LC-MS/MS DDA or DIA Data