R/spectra_utils.R

Defines functions read.OnDiskMS.data read.InMemMSd.data read.MSdata naOmit getIsotopeCluster getderivativeIons checkQuasimolion checkOidCausality checkIps checkHypothese checkIsotopes createHypothese massDiffMatrix annotateGrp readLists setDefaultLists sampclass getPeaklist findAdducts calcPC.hcs calcCiS getEIC4Peaks getAllPeakEICs create.matrix getEICs findIsotopesPspec calcIsotopeMatrix is.wholenumber percentOutput groupval getPeaks_selection PlotMS2Spectra .getDdaMS2Scans ExtractMS2data PeakPicking_core PeakPicking_prep breaks_on_nBins descendZero filtfft imputeLinInterpol .aggregateMethod2int binYonX .groupval .feature_values .getChromPeakData .applyRtAdjustment .applyRtAdjToChromPeaks .peakIndex .getPeakGroupsRtMatrix .rawMat .narrow_rt_boundaries na.flatfill rectUnique findEqualGreaterM trimm joinOverlappingPeaks descendMinTol descendMin MSW.getRidge MSW.getLocalMaximumCWT MSW.extendLength MSW.extendNBase continuousPtsAboveThresholdIdx getLocalNoiseEstimate continuousPtsAboveThreshold creatPeakTable mSet2xcmsSet PerformPeakFiling RT.Adjust_Slave PerformPeakAlignment Densitygrouping_slave PerformPeakGrouping PeakPicking_MatchedFilter_slave PeakPicking_centWave_slave PerformPeakPicking

Documented in creatPeakTable Densitygrouping_slave ExtractMS2data mSet2xcmsSet PeakPicking_centWave_slave PeakPicking_core PeakPicking_MatchedFilter_slave PeakPicking_prep PerformPeakAlignment PerformPeakFiling PerformPeakGrouping PerformPeakPicking PlotMS2Spectra RT.Adjust_Slave

# Content of this script

# 1. raw data function - xcms 
# 2. Peaks Peaking Function - parameters optimization
# 3. MS/MS processing - (moved from preproc_utils)
# 4. Peak Annotation - CAMERA
# 5. MS File Reading - MSnbase

# --------------------1. Raw spectra processing_Section using the xcms way---------------------------------------------

#'PerformPeakPicking
#'@description PerformPeakPicking
#'@param object the raw data object read by ImportRawMSData function.
#'@param param param list generated by updateRawSpectraParam function.
#'@param BPPARAM parallel method used for data processing. Default is bpparam().
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
PerformPeakPicking<-function(object,param,BPPARAM = bpparam()){
  
  object_mslevel <- MSnbase::filterMsLevel(
    MSnbase::selectFeatureData(object,
                               fcol = c(MSnbase:::.MSnExpReqFvarLabels,
                                        "centroided")), msLevel. = 1)
  
  if (length(object_mslevel) == 0)
    AddErrMsg("Empty spectra present to perform !")
  
  # Splite the MS data 
  object_mslevel <- lapply(1:length(fileNames(object_mslevel)),
                           FUN = filterFile,
                           object = object_mslevel)
  
  # Peak picking runnning - centWave mode
  if (param$Peak_method == "centWave")
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_centWave_slave, # slave Function of centWave
                        param = param, BPPARAM = BPPARAM)
  
  # Peak picking runnning - MatchedFilter mode
  if (param$Peak_method == "matchedFilter")
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_MatchedFilter_slave, # slave Function of MatchedFilter
                        param = param, BPPARAM = BPPARAM)
  
  
  
  if (param$Peak_method != "matchedFilter" && param$Peak_method != "centWave"){
    stop("Only 'centWave' and 'MatchedFilter' are suppoted now !")
  }
  
  # Picking Cluster
  
  ##### PEAK SUMMARY------------
  
  pks <- vector("list", length(resList))
  for (i in 1:length(resList)) {
    n_pks <- nrow(resList[[i]])
    if (is.null(n_pks))
      n_pks <- 0
    if (n_pks == 0) {
      pks[[i]] <- NULL
      warning("No peaks found in sample number ", i, ".")
    } else {
      pks[[i]] <- cbind(resList[[i]], sample = rep.int(i, n_pks))
    }
    
  } 
  pks <- do.call(rbind, pks)
  
  # Make the chrompeaks embeded
  newFd <- list()
  #newFd@.xData <- as.environment(as.list(object@msFeatureData, all.names = TRUE))
  rownames(pks) <- sprintf(paste0("CP", "%0", ceiling(log10(nrow(pks) + 1L)), "d"),
                           seq(from = 1, length.out = nrow(pks)))
  
  newFd$chromPeaks <- pks
  newFd$chromPeakData <- S4Vectors::DataFrame(ms_level = rep(1L, nrow(pks)), is_filled = rep(FALSE, nrow(pks)),
                                              row.names = rownames(pks))
  
  ## mSet Generation
  mSet<-list()
  mSet$msFeatureData <- newFd
  mSet$onDiskData <- object
  
  return(mSet)
  
}

#'PeakPicking_centWave_slave
#'@description PeakPicking_centWave_slave
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
PeakPicking_centWave_slave <- function(x,param){
  
  if (class(x)=="OnDiskMSnExp"){ # for raw data processing
    
    scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
    rt <- MSnbase::rtime(x);
    
  } else if (class(x) == "list") { # for parameters optimization
    
    scan.set <- x;
    rt <- unlist(lapply(x,  MSnbase::rtime), use.names = FALSE);
    
  }
  
  
  mzs <- lapply(scan.set, MSnbase::mz)
  
  vals_per_spect <- lengths(mzs, FALSE)
  
  
  #if (any(vals_per_spect == 0))
  #  mzs <- mzs[-which(vals_per_spect == 0)];
  #  x <- x[-which(vals_per_spect == 0)]; # Cannot be so easy !
  #  warning("Found empty spectra Scan. Filtered Automatically !")
  
  mz = unlist(mzs, use.names = FALSE);
  int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
  valsPerSpect = vals_per_spect;
  scantime = rt;
  
  ######   centWaveCore Function
  
  valCount <- cumsum(valsPerSpect)
  scanindex <- as.integer(c(0, valCount[-length(valCount)])) ## Get index vector for C calls
  
  if (!is.double(mz))
    mz <- as.double(mz)
  if (!is.double(int))
    int <- as.double(int)
  ## Fix the mzCenterFun
  #mzCenterFun <- paste("mzCenter",
  #                     gsub(mzCenterFun, pattern = "mzCenter.",
  #                          replacement = "", fixed = TRUE), sep=".")
  
  basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax",
                 "into", "intb", "maxo", "sn")
  verbosenames <- c("egauss", "mu", "sigma", "h", "f", "dppm", "scale",
                    "scpos", "scmin", "scmax", "lmin", "lmax")
  
  ## Peak width: seconds to scales
  peakwidth <- param$peakwidth
  scalerange <- round((peakwidth / mean(diff(scantime))) / 2)
  
  if (length(z <- which(scalerange == 0)))
    scalerange <- scalerange[-z]
  if (length(scalerange) < 1) {
    warning("No scales? Please check peak width!")
    if (param$verboseColumns) {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
                          length(verbosenames))
      colnames(nopeaks) <- c(basenames, verbosenames)
    } else {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames))
      colnames(nopeaks) <- c(basenames)
    }
    return(invisible(nopeaks))
  }
  
  if (length(scalerange) > 1){
    scales <- seq(from = scalerange[1], to = scalerange[2], by = 2)
  } else{
    scales <- scalerange
  }
  
  
  minPeakWidth <-  scales[1]
  noiserange <- c(minPeakWidth * 3, max(scales) * 3)
  maxGaussOverlap <- 0.5
  minPtsAboveBaseLine <- max(4, minPeakWidth - 2)
  minCentroids <- minPtsAboveBaseLine
  scRangeTol <-  maxDescOutlier <- floor(minPeakWidth / 2)
  scanrange <- c(1, length(scantime))
  
  
  ## If no ROIs are supplied then search for them.
  roiList<-list()
  if (length(roiList) == 0) {
    message("ROI searching under ", param$ppm, " ppm ... ", appendLF = FALSE)
    
    withRestarts(
      tryCatch({
        
        tmp <- capture.output(
          roiList <- .Call(C_findmzROI,
                           mz, int, scanindex,
                           as.double(c(0.0, 0.0)),
                           as.integer(scanrange),
                           as.integer(length(scantime)),
                           as.double(param$ppm * 1e-6),
                           as.integer(minCentroids),
                           as.integer(param$prefilter),
                           as.integer(param$noise),PACKAGE = "MetaboAnalystR")
          
          #roiList <- findmzROI (mz, int, scanindex,scanrange,scantime,param$ppm,
          #                      minCentroids,param$prefilter,param$noise)
          
        )
        
      },
      error = function(e){
        if (grepl("m/z sort assumption violated !", e$message)) {
          invokeRestart("fixSort")
        } else {
          simpleError(e)
        }
      }),
      fixSort = function() {
        ## Force ordering of values within spectrum by mz:
        ##  o split values into a list -> mz per spectrum, intensity per
        ##    spectrum.
        ##  o define the ordering.
        ##  o re-order the mz and intensity and unlist again.
        ## Note: the Rle split is faster than the "conventional" factor split.
        splitF <- Rle(1:length(valsPerSpect), valsPerSpect)
        mzl <- as.list(S4Vectors::split(mz, f = splitF))
        oidx <- lapply(mzl, order)
        mz <<- unlist(mapply(mzl, oidx, FUN = function(y, z) {
          return(y[z])
        }, SIMPLIFY = FALSE, USE.NAMES = FALSE), use.names = FALSE)
        int <<- unlist(mapply(as.list(split(int, f = splitF)), oidx,
                              FUN=function(y, z) {
                                return(y[z])
                              }, SIMPLIFY = FALSE, USE.NAMES = FALSE),
                       use.names = FALSE)
        rm(mzl)
        rm(splitF)
        tmp <- capture.output(
          roiList <<- .Call(C_findmzROI,
                            mz, int, scanindex,
                            as.double(c(0.0, 0.0)),
                            as.integer(scanrange),
                            as.integer(length(scantime)),
                            as.double(param$ppm * 1e-6),
                            as.integer(minCentroids),
                            as.integer(param$prefilter),
                            as.integer(param$noise),PACKAGE = "MetaboAnalystR")
        )
      }
    )
  }
  
  ## Second stage: process the ROIs
  peaklist <- list();
  Nscantime <- length(scantime)
  lf <- length(roiList)
  
  ## print('\n Detecting chromatographic peaks ... \n % finished: ')
  ## lp <- -1
  message("Detecting chromatographic peaks in ", length(roiList),
          " regions of interest ...", appendLF = FALSE)
  
  roiScales = NULL
  
  for (f in  1:lf) {
    
    
    feat <- roiList[[f]]
    N <- feat$scmax - feat$scmin + 1
    peaks <- peakinfo <- NULL
    mzrange <- c(feat$mzmin, feat$mzmax)
    sccenter <- feat$scmin[1] + floor(N/2) - 1
    scrange <- c(feat$scmin, feat$scmax)
    ## scrange + noiserange, used for baseline detection and wavelet analysis
    sr <- c(max(scanrange[1], scrange[1] - max(noiserange)),
            min(scanrange[2], scrange[2] + max(noiserange)))
    eic <- .Call(C_getEIC, mz, int, scanindex, as.double(mzrange),
                 as.integer(sr), as.integer(length(scanindex)),PACKAGE = "MetaboAnalystR")
    
    
    ## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr)
    d <- eic$intensity
    td <- sr[1]:sr[2]
    scan.range <- c(sr[1], sr[2])
    ## original mzROI range
    idxs <- which(eic$scan %in% seq(scrange[1], scrange[2]))
    mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs])
    ## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange)
    omz <- .Call(C_getMZ, mz, int, scanindex, as.double(mzrange),
                 as.integer(scrange), as.integer(length(scantime)))
    
    
    ## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange)
    if (all(omz == 0)) {
      warning("centWave: no peaks found in ROI.")
      next
    }
    od  <- mzROI.EIC$intensity
    otd <- mzROI.EIC$scan
    if (all(od == 0)) {
      warning("centWave: no peaks found in ROI.")
      next
    }
    
    ## scrange + scRangeTol, used for gauss fitting and continuous
    ## data above 1st baseline detection
    ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)],
                                                     scrange[2] + scRangeTol)
    fd <- d[match(ftd, td)]
    
    ## 1st type of baseline: statistic approach
    if (N >= 10*minPeakWidth) {
      ## in case of very long mass trace use full scan range
      ## for baseline detection
      noised <- .Call(C_getEIC, mz, int, scanindex, as.double(mzrange),
                      as.integer(scanrange), as.integer(length(scanindex)))$intensity
      ## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity
    } else {
      noised <- d
    }
    ## 90% trimmed mean as first baseline guess
    
    
    gz <- which(noised > 0);
    
    if (length(gz) < 3*minPeakWidth){
      noise <- mean(noised)
    } else {
      noise <- mean(noised[gz], trim=0.05)
    }
    
    firstBaselineCheck = TRUE
    ## any continuous data above 1st baseline ?
    if (firstBaselineCheck &
        !continuousPtsAboveThreshold(fd, threshold = noise,
                                     num = minPtsAboveBaseLine))
      next
    ## 2nd baseline estimate using not-peak-range
    lnoise <- getLocalNoiseEstimate(d, td, ftd, noiserange, Nscantime,
                                    threshold = noise,
                                    num = minPtsAboveBaseLine)
    ## Final baseline & Noise estimate
    baseline <- max(1, min(lnoise[1], noise))
    sdnoise <- max(1, lnoise[2])
    sdthr <-  sdnoise * param$snthresh
    ## is there any data above S/N * threshold ?
    if (!(any(fd - baseline >= sdthr)))
      next
    wCoefs <- MSW.cwt(d, scales = scales, wavelet = 'mexh')
    if (!(!is.null(dim(wCoefs)) && any(wCoefs- baseline >= sdthr)))
      next
    if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise
      wCoefs[nrow(wCoefs),] <- wCoefs[nrow(wCoefs) - 1, ] * 0.99
    localMax <- MSW.getLocalMaximumCWT(wCoefs)
    rL <- MSW.getRidge(localMax)
    wpeaks <- sapply(rL,
                     function(x) {
                       w <- min(1:length(x),ncol(wCoefs))
                       any(wCoefs[x,w]- baseline >= sdthr)
                     })
    if (any(wpeaks)) {
      wpeaksidx <- which(wpeaks)
      ## check each peak in ridgeList
      for (p in 1:length(wpeaksidx)) {
        opp <- rL[[wpeaksidx[p]]]
        pp <- unique(opp)
        if (length(pp) >= 1) {
          dv <- td[pp] %in% ftd
          if (any(dv)) { ## peaks in orig. data range
            ## Final S/N check
            if (any(d[pp[dv]]- baseline >= sdthr)) {
              ## if(!is.null(roiScales)) {
              ## allow roiScales to be a numeric of length 0
              if(length(roiScales) > 0) {
                ## use given scale
                best.scale.nr <- which(scales == roiScales[[f]])
                if(best.scale.nr > length(opp))
                  best.scale.nr <- length(opp)
              } else {
                ## try to decide which scale describes the peak best
                inti <- numeric(length(opp))
                irange <- rep(ceiling(scales[1]/2), length(opp))
                for (k in 1:length(opp)) {
                  kpos <- opp[k]
                  r1 <- ifelse(kpos - irange[k] > 1,
                               kpos-irange[k], 1)
                  r2 <- ifelse(kpos + irange[k] < length(d),
                               kpos + irange[k], length(d))
                  inti[k] <- sum(d[r1:r2])
                }
                maxpi <- which.max(inti)
                if (length(maxpi) > 1) {
                  m <- wCoefs[opp[maxpi], maxpi]
                  bestcol <- which(m == max(m),
                                   arr.ind = TRUE)[2]
                  best.scale.nr <- maxpi[bestcol]
                } else  best.scale.nr <- maxpi
              }
              
              best.scale <-  scales[best.scale.nr]
              best.scale.pos <- opp[best.scale.nr]
              
              pprange <- min(pp):max(pp)
              ## maxint <- max(d[pprange])
              lwpos <- max(1,best.scale.pos - best.scale)
              rwpos <- min(best.scale.pos + best.scale, length(td))
              p1 <- match(td[lwpos], otd)[1]
              p2 <- match(td[rwpos], otd)
              p2 <- p2[length(p2)]
              if (is.na(p1)) p1 <- 1
              if (is.na(p2)) p2 <- N
              mz.value <- omz[p1:p2]
              mz.int <- od[p1:p2]
              maxint <- max(mz.int)
              
              ## re-calculate m/z value for peak range
              mzrange <- range(mz.value)
              #mzmean <- do.call(mzCenterFun,
              #                  list(mz = mz.value,
              #                       intensity = mz.int))
              mzmean <- weighted.mean(mz.value, mz.int)
              ## Compute dppm only if needed
              dppm <- NA
              if (param$verboseColumns) {
                if (length(mz.value) >= (minCentroids + 1)) {
                  dppm <- round(min(running(abs(diff(mz.value)) /
                                              (mzrange[2] *  1e-6),
                                            fun = max,
                                            width = minCentroids)))
                } else {
                  dppm <- round((mzrange[2] - mzrange[1]) /
                                  (mzrange[2] * 1e-6))
                }
              }
              peaks <- rbind(peaks,
                             c(mzmean,mzrange, ## mz
                               NA, NA, NA,     ## rt, rtmin, rtmax,
                               NA,             ## intensity (sum)
                               NA,             ## intensity (-bl)
                               maxint,         ## max intensity
                               round((maxint - baseline) / sdnoise),  ##  S/N Ratio
                               NA,             ## Gaussian RMSE
                               NA,NA,NA,       ## Gaussian Parameters
                               f,              ## ROI Position
                               dppm,           ## max. difference between the [minCentroids] peaks in ppm
                               best.scale,     ## Scale
                               td[best.scale.pos],
                               td[lwpos],
                               td[rwpos],  ## Peak positions guessed from the wavelet's (scan nr)
                               NA, NA))                    ## Peak limits (scan nr)
              peakinfo <- rbind(peakinfo,
                                c(best.scale, best.scale.nr,
                                  best.scale.pos, lwpos, rwpos))
              ## Peak positions guessed from the wavelet's
            }
          }
        }
      }  ##for
    } ## if
    
    ##  postprocessing
    if (!is.null(peaks)) {
      colnames(peaks) <- c(basenames, verbosenames)
      colnames(peakinfo) <- c("scale", "scaleNr", "scpos",
                              "scmin", "scmax")
      for (p in 1:dim(peaks)[1]) {
        ## find minima (peak boundaries), assign rt and intensity values
        if (param$integrate == 1) {
          lm <- descendMin(wCoefs[, peakinfo[p, "scaleNr"]],
                           istart = peakinfo[p, "scpos"])
          gap <- all(d[lm[1]:lm[2]] == 0) # looks like we got stuck in a gap right in the middle of the peak
          if ((lm[1] == lm[2]) || gap)   # fall-back
            lm <- descendMinTol(
              d, startpos = c(peakinfo[p, "scmin"],
                              peakinfo[p, "scmax"]),
              maxDescOutlier)
        } else {
          lm <- descendMinTol(d, startpos = c(peakinfo[p, "scmin"],
                                              peakinfo[p, "scmax"]),
                              maxDescOutlier)
        }
        ## Narrow peak rt boundaries by removing values below threshold
        lm <- .narrow_rt_boundaries(lm, d)
        lm_seq <- lm[1]:lm[2]
        pd <- d[lm_seq]
        
        peakrange <- td[lm]
        peaks[p, "rtmin"] <- scantime[peakrange[1]]
        peaks[p, "rtmax"] <- scantime[peakrange[2]]
        peaks[p, "maxo"] <- max(pd)
        pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) /
          (peakrange[2] - peakrange[1])
        if (is.na(pwid))
          pwid <- 1
        peaks[p, "into"] <- pwid * sum(pd)
        db <- pd - baseline
        peaks[p, "intb"] <- pwid * sum(db[db > 0])
        peaks[p, "lmin"] <- lm[1]
        peaks[p, "lmax"] <- lm[2]
        
        if (param$fitgauss) {
          ## perform gaussian fits, use wavelets for inital parameters
          td_lm <- td[lm_seq]
          md <- max(pd)
          d1 <- pd / md ## normalize data for gaussian error calc.
          pgauss <- fitGauss(td_lm, pd,
                             pgauss = list(mu = peaks[p, "scpos"],
                                           sigma = peaks[p, "scmax"] -
                                             peaks[p, "scmin"],
                                           h = peaks[p, "maxo"]))
          
          
          rtime <- peaks[p, "scpos"]
          if (!any(is.na(pgauss)) && all(pgauss > 0)) {
            gtime <- td[match(round(pgauss$mu), td)]
            if (!is.na(gtime)) {
              rtime <- gtime
              peaks[p, "mu"] <- pgauss$mu
              peaks[p, "sigma"] <- pgauss$sigma
              peaks[p, "h"] <- pgauss$h
              peaks[p,"egauss"] <- sqrt(
                (1 / length(td_lm)) *
                  sum(((d1 - gauss(td_lm, pgauss$h / md,
                                   pgauss$mu, pgauss$sigma))^2)))
            }
          }
          peaks[p, "rt"] <- scantime[rtime]
          ## avoid fitting side effects
          if (peaks[p, "rt"] < peaks[p, "rtmin"])
            peaks[p, "rt"] <- scantime[peaks[p, "scpos"]]
        } else
          peaks[p, "rt"] <- scantime[peaks[p, "scpos"]]
      }
      peaks <- joinOverlappingPeaks(td, d, otd, omz, od, scantime,
                                    scan.range, peaks, maxGaussOverlap)
    }
    if (!is.null(peaks)) {
      peaklist[[length(peaklist) + 1]] <- peaks
    }
  } ## f
  

  if (length(peaklist) == 0) {
    warning("No peaks found!")
    
    if (param$verboseColumns) {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
                          length(verbosenames))
      colnames(nopeaks) <- c(basenames, verbosenames)
    } else {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames))
      colnames(nopeaks) <- c(basenames)
    }
    message(" FAIL: none found!")
    return(nopeaks)
  }
  
  p <- do.call(rbind, peaklist)
  

  if (!param$verboseColumns)
    p <- p[, basenames, drop = FALSE]
  
  uorder <- order(p[, "into"], decreasing = TRUE)
  pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE])
  uindex <- rectUnique(pm, uorder, param$mzdiff, ydiff = -0.00001) ## allow adjacent peaks
  pr <- p[uindex, , drop = FALSE]
  message(" OK: ", nrow(pr), " found.")
  
  return(pr)
  
}

#'PeakPicking_MatchedFilter_slave
#'@description PeakPicking_MatchedFilter_slave
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
PeakPicking_MatchedFilter_slave <- function(x,param){
  
  if (class(x)=="OnDiskMSnExp"){ # for raw data processing
    
    scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
    rt <- MSnbase::rtime(x);
    
  } else if (class(x) == "list") { # for parameters optimization
    
    scan.set <- x;
    rt <- unlist(lapply(x,  MSnbase::rtime), use.names = FALSE);
    
  }
  
  ## 1. data & Param preparation
  mzs <- lapply(scan.set, MSnbase::mz)
  valsPerSpect <- lengths(mzs, FALSE)
  mz = unlist(mzs, use.names = FALSE);
  int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
  scantime = rt;
  mrange <- range(mz[mz > 0])
  mass <- seq(floor(mrange[1] / param$binSize) * param$binSize,
              ceiling(mrange[2] / param$binSize) * param$binSize,
              by = param$binSize)
  impute <- param$impute;
  baseValue <- param$baseValue;
  distance <- param$distance;
  steps <- param$steps;
  sigma <- param$sigma;
  snthresh <-param$snthresh;
  max <- param$max;
  mzdiff <- param$mzdiff;
  binSize <- param$binSize;
  index <- param$index;
  
  
  ## 2. Binning the data.
  ## Create and translate settings for binYonX
  toIdx <- cumsum(valsPerSpect)
  fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L)
  shiftBy = TRUE
  binFromX <- min(mass)
  binToX <- max(mass)
  ## binSize <- (binToX - binFromX) / (length(mass) - 1)
  ## brks <- seq(binFromX - binSize/2, binToX + binSize/2, by = binSize)
  brks <- breaks_on_nBins(fromX = binFromX, toX = binToX,
                          nBins = length(mass), shiftByHalfBinSize = TRUE)
  binRes <- binYonX(mz, int,
                    breaks = brks,
                    fromIdx = fromIdx,
                    toIdx = toIdx,
                    baseValue = ifelse(impute == "none", yes = 0, no = NA),
                    sortedX = TRUE,
                    returnIndex = TRUE
  )
  
  
  if (length(toIdx) == 1)
    binRes <- list(binRes)
  bufMax <- do.call(cbind, lapply(binRes, function(z) return(z$index)))
  bin_size <- binRes[[1]]$x[2] - binRes[[1]]$x[1]
  ## Missing value imputation
  if (missing(baseValue))
    baseValue <- numeric()
  if (length(baseValue) == 0)
    baseValue <- min(int, na.rm = TRUE) / 2
  if (missing(distance))
    distance <- numeric()
  if (length(distance) == 0)
    distance <- floor(0.075 / bin_size)
  
  binVals <- lapply(binRes, function(z) {
    return(imputeLinInterpol(z$y, method = impute, distance = distance,
                             noInterpolAtEnds = TRUE,
                             baseValue = baseValue))
  })
  
  buf <- do.call(cbind, binVals)
  
  
  
  bufidx <- 1L:length(mass)
  lookahead <- steps-1
  lookbehind <- 1
  
  N <- nextn(length(scantime))
  xrange <- range(scantime)
  x <- c(0:(N/2), -(ceiling(N/2-1)):-1) *
    (xrange[2]-xrange[1])/(length(scantime)-1)
  
  filt <- -attr(eval(deriv3(~ 1/(sigma*sqrt(2*pi)) *
                              exp(-x^2/(2*sigma^2)), "x")), "hessian")
  filt <- filt/sqrt(sum(filt^2))
  filt <- fft(filt, inverse = TRUE)/length(filt)
  
  cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf",
              "maxo", "maxf", "i", "sn")
  num <- 0
  ResList <- list()
  
  ## Can not do much here, lapply/apply won't work because of the 'steps' parameter.
  ## That's looping through the masses, i.e. rows of the profile matrix.
  for (i in seq(length = length(mass)-steps+1)) {
    ymat <- buf[bufidx[i:(i+steps-1)], , drop = FALSE]
    ysums <- colMax(ymat)
    yfilt <- filtfft(ysums, filt)
    gmax <- max(yfilt)
    for (j in seq(length = max)) {
      maxy <- which.max(yfilt)
      noise <- mean(ysums[ysums > 0])
      ##noise <- mean(yfilt[yfilt >= 0])
      sn <- yfilt[maxy]/noise
      if (yfilt[maxy] > 0 && yfilt[maxy] > snthresh*noise && ysums[maxy] > 0) {
        peakrange <- descendZero(yfilt, maxy)
        intmat <- ymat[, peakrange[1]:peakrange[2], drop = FALSE]
        mzmat <- matrix(mz[bufMax[bufidx[i:(i+steps-1)],
                                  peakrange[1]:peakrange[2]]],
                        nrow = steps)
        which.intMax <- which.colMax(intmat)
        mzmat <- mzmat[which.intMax]
        if (all(is.na(mzmat))) {
          yfilt[peakrange[1]:peakrange[2]] <- 0
          next
        }
        mzrange <- range(mzmat, na.rm = TRUE)
        massmean <- weighted.mean(mzmat, intmat[which.intMax],
                                  na.rm = TRUE)
        ## This case (the only non-na m/z had intensity 0) was reported
        ## by Gregory Alan Barding "binlin processing"
        if(any(is.na(massmean))) {
          massmean <- mean(mzmat, na.rm = TRUE)
        }
        pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) /
          (peakrange[2] - peakrange[1])
        into <- pwid*sum(ysums[peakrange[1]:peakrange[2]])
        intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]])
        maxo <- max(ysums[peakrange[1]:peakrange[2]])
        maxf <- yfilt[maxy]
        
        yfilt[peakrange[1]:peakrange[2]] <- 0
        num <- num + 1
        ResList[[num]] <- c(massmean, mzrange[1], mzrange[2], maxy,
                            peakrange, into, intf, maxo, maxf, j, sn)
      } else
        break
    }
  }
  if (length(ResList) == 0) {
    rmat <- matrix(nrow = 0, ncol = length(cnames))
    colnames(rmat) <- cnames
    return(rmat)
  }
  rmat <- do.call(rbind, ResList)
  if (is.null(dim(rmat))) {
    rmat <- matrix(rmat, nrow = 1)
  }
  colnames(rmat) <- cnames
  max <- max-1 + max*(steps-1) + max*ceiling(mzdiff/binSize)
  if (index){
    mzdiff <- mzdiff/binSize
  }  else {
    rmat[, "rt"] <- scantime[rmat[, "rt"]]
    rmat[, "rtmin"] <- scantime[rmat[, "rtmin"]]
    rmat[, "rtmax"] <- scantime[rmat[, "rtmax"]]
  }
  ## Select for each unique mzmin, mzmax, rtmin, rtmax the largest peak
  ## and report that.
  uorder <- order(rmat[, "into"], decreasing = TRUE)
  uindex <- rectUnique(rmat[, c("mzmin", "mzmax", "rtmin", "rtmax"),
                            drop = FALSE],
                       uorder, mzdiff)
  rmat <- rmat[uindex,,drop = FALSE]
  
  return(rmat)
  
}

#'PerformPeakGrouping
#'@description PerformPeakGrouping
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
PerformPeakGrouping<-function(mSet,param){
  
  ## 1. Extract Information-------
  peaks_0 <- mSet[["msFeatureData"]][["chromPeaks"]]
  
  peaks <- cbind(peaks_0[, c("mz", "rt", "sample", "into"), drop = FALSE],
                 index = seq_len(nrow(peaks_0)))
  
  if (!is.null(mSet[["onDiskData"]])){
    sample_group<-as.character(mSet[["onDiskData"]]@phenoData@data[["sample_group"]]);
    
    if (identical(sample_group,character(0))){
      sample_group<-as.character(mSet[["onDiskData"]]@phenoData@data[["sample_name"]]);
    }
    
  } else {
    sample_group<-as.character(mSet[["inMemoryData"]]@phenoData@data[["sample_group"]]);
    
    if (identical(sample_group,character(0))){
      sample_group<-as.character(mSet[["inMemoryData"]]@phenoData@data[["sample_name"]]);
    }
  }
  
  
  sampleGroupNames <- unique(sample_group)
  sampleGroupTable <- table(sample_group)
  nSampleGroups <- length(sampleGroupTable)
  
  ## 2. Order peaks matrix by mz-------
  peaks <- peaks[order(peaks[, "mz"]), , drop = FALSE]
  rownames(peaks) <- NULL
  rtRange <- range(peaks[, "rt"])
  
  ## 3. Define the mass slices and the index in the peaks matrix with an mz-------
  mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + param$binSize,
              by = param$binSize / 2)
  masspos <- findEqualGreaterM(peaks[, "mz"], mass)
  
  densFrom <- rtRange[1] - 3 * param$bw
  densTo <- rtRange[2] + 3 * param$bw
  
  
  ## 4. Increase the number of sampling points for the density distribution.-------
  densN <- max(512, 2 * 2^(ceiling(log2(diff(rtRange) / (param$bw / 2)))))
  endIdx <- 0
  message("Total of ", length(mass) - 1, " slices detected for processing... ",
          appendLF = FALSE)
  resL <- vector("list", (length(mass) - 2))
  
  for (i in seq_len(length(mass)-2)) {
    ## That's identifying overlapping mz slices.
    startIdx <- masspos[i]
    endIdx <- masspos[i + 2] - 1
    if (endIdx - startIdx < 0)
      next
    resL[[i]] <- Densitygrouping_slave(peaks[startIdx:endIdx, , drop = FALSE],
                                       bw = param$bw, densFrom = densFrom,
                                       densTo = densTo, densN = densN,
                                       sampleGroups = sample_group,
                                       sampleGroupTable = sampleGroupTable,
                                       minFraction = param$minFraction,
                                       minSamples = param$minSamples,
                                       maxFeatures = param$maxFeatures)
  }
  
  message("Done !")
  res <- do.call(rbind, resL)
  
  if (nrow(res)) {
    ## Remove groups that overlap with more "well-behaved" groups
    numsamp <- rowSums(
      as.matrix(res[, (match("npeaks", colnames(res)) +1):(ncol(res) -1),
                    drop = FALSE]))
    uorder <- order(-numsamp, res[, "npeaks"])
    
    uindex <- rectUnique(
      as.matrix(res[, c("mzmin", "mzmax", "rtmin", "rtmax"),
                    drop = FALSE]), uorder)
    res <- res[uindex, , drop = FALSE]
    rownames(res) <- NULL
  }
  
  df <- S4Vectors::DataFrame(res)
  rownames(df) <- sprintf(paste0("FT", "%0", ceiling(log10(nrow(df) + 1L)), "d"),
                          seq(from = 1, length.out = nrow(df)))
  
  
  mSet$FeatureGroupTable <- df
  
  mSet
}

#'Densitygrouping_slave
#'@description Densitygrouping_slave
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
Densitygrouping_slave <- function(x, bw, densFrom, densTo, densN, sampleGroups,
                                  sampleGroupTable, minFraction,
                                  minSamples, maxFeatures) {
  den <- density(x[, "rt"], bw = bw, from = densFrom, to = densTo,
                 n = densN)
  maxden <- max(den$y)
  deny <- den$y
  sampleGroupNames <- names(sampleGroupTable)
  nSampleGroups <- length(sampleGroupNames)
  col_nms <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",
               "npeaks", sampleGroupNames)
  res_mat <- matrix(nrow = 0, ncol = length(col_nms),
                    dimnames = list(character(), col_nms))
  res_idx <- list()
  while (deny[maxy <- which.max(deny)] > maxden / 20 && nrow(res_mat) <
         maxFeatures) {
    grange <- descendMin(deny, maxy)
    deny[grange[1]:grange[2]] <- 0
    gidx <- which(x[,"rt"] >= den$x[grange[1]] &
                    x[,"rt"] <= den$x[grange[2]])
    ## Determine the sample group of the samples in which the peaks
    ## were detected and check if they correspond to the required limits.
    tt <- table(sampleGroups[unique(x[gidx, "sample"])])
    if (!any(tt / sampleGroupTable[names(tt)] >= minFraction &
             tt >= minSamples))
      next
    gcount <- rep(0, length(sampleGroupNames))
    names(gcount) <- sampleGroupNames
    gcount[names(tt)] <- as.numeric(tt)
    res_mat <- rbind(res_mat,
                     c(median(x[gidx, "mz"]),
                       range(x[gidx, "mz"]),
                       median(x[gidx, "rt"]),
                       range(x[gidx, "rt"]),
                       length(gidx),
                       gcount)
    )
    res_idx <- c(res_idx, list(unname(sort(x[gidx, "index"]))))
  }
  
  res <- as.data.frame(res_mat)
  res$peakidx <- res_idx
  res
}

#'PerformPeakAlignment
#'@description PerformPeakAlignment
#'@param mSet the mSet object generated by PerformPeakPicking function.
#'@param param param list generated by updateRawSpectraParam function.
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
PerformPeakAlignment<-function(mSet,param){
  ## 1~4. Peak Grouping---------
  mSet<-PerformPeakGrouping(mSet,param)
  ## 5. Adjust Retention Time-------
  
  # 1). Group Matrix-------
  peaks_0 <- mSet[["msFeatureData"]][["chromPeaks"]]
  
  if (!is.null(mSet[["onDiskData"]])){
    subs <- seq_along(mSet[["onDiskData"]]@phenoData@data[["sample_name"]]);
    format <- "onDiskData"
  } else {
    subs <- seq_along(mSet[["inMemoryData"]]@phenoData@data[["sample_name"]]);
    format <- "inMemoryData"
  }
  nSamples <- length(subs)
  
  subset_names <- original_names <- mSet[["msFeatureData"]][["chromPeakData"]]@rownames
  mSet[["FeatureGroupTable"]]$peakidx <- lapply(mSet[["FeatureGroupTable"]]$peakidx, function(z) {
    idx <- base::match(original_names[z], subset_names)
    idx[!is.na(idx)]
  })
  peakIndex_0 <- mSet[["FeatureGroupTable"]][lengths(mSet[["FeatureGroupTable"]]$peakidx) > 0, ]
  peakIndex <- .peakIndex(peakIndex_0)
  
  pkGrpMat <- .getPeakGroupsRtMatrix(peaks = peaks_0,
                                     peakIndex = peakIndex,
                                     sampleIndex = subs,
                                     missingSample = nSamples - (nSamples * param$minFraction),
                                     extraPeaks = param$extraPeaks
  )
  
  colnames(pkGrpMat) <- mSet[[format]]@phenoData@data[["sample_name"]][subs]
  
  # 2). RT adjustment-------
  rtime_0 <- rtime(mSet[[format]])
  
  tmp <- split(rtime_0, fromFile(mSet[[format]]))
  rtime <- vector("list", length(fileNames(mSet[[format]])))
  names(rtime) <- as.character(1:length(rtime))
  rtime[as.numeric(names(tmp))] <- tmp
  
  res <- RT.Adjust_Slave(peaks_0,
                         peakIndex = peakIndex_0$peakidx,
                         rtime = rtime,
                         minFraction = param$minFraction,
                         extraPeaks = param$extraPeaks,
                         smooth = param$smooth,
                         span = param$span,
                         family = param$family,
                         peakGroupsMatrix = pkGrpMat,
                         subsetAdjust = param$subsetAdjust
  )
  
  message("Applying retention time adjustment to the identified chromatographic peaks ... ",appendLF = FALSE)
  
  fts <- .applyRtAdjToChromPeaks(mSet[["msFeatureData"]][["chromPeaks"]],
                                 rtraw = rtime,
                                 rtadj = res)
  mSet[["msFeatureData"]][["chromPeaks"]] <- fts
  mSet[["msFeatureData"]][["adjustedRT"]] <- res
  # 
  message("Done !")
  
  ## 6~9. Peak Grouping Again---------
  mSet<-PerformPeakGrouping(mSet,param)
  ## 10. Return------
  return(mSet)
}

#'RT.Adjust_Slave
#'@description RT.Adjust_Slave
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
RT.Adjust_Slave <-
  function(peaks, peakIndex, rtime, minFraction = 0.9, extraPeaks = 1,
           smooth = c("loess", "linear"), span = 0.2,
           family = c("gaussian", "symmetric"),
           peakGroupsMatrix = matrix(ncol = 0, nrow = 0),
           subsetAdjust = c("average", "previous"))  {
    ## minFraction
    if (any(minFraction > 1) | any(minFraction < 0))
      stop("'minFraction' has to be between 0 and 1!")
    ## Check peakIndex:
    if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks)))))
      stop("Some indices listed in 'peakIndex' are outside of ",
           "1:nrow(peaks)!")
    ## Check rtime:
    if (!is.list(rtime))
      stop("'rtime' should be a list of numeric vectors with the retention ",
           "times of the spectra per sample!")
    if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE)))
      stop("'rtime' should be a list of numeric vectors with the retention ",
           "times of the spectra per sample!")
    if (length(rtime) != max(peaks[, "sample"]))
      stop("The length of 'rtime' does not match with the total number of ",
           "samples according to the 'peaks' matrix!")
    total_samples <- length(rtime)
    subset <- seq_len(total_samples)
    ## Translate minFraction to number of allowed missing samples.
    nSamples <- length(subset)
    missingSample <- nSamples - (nSamples * minFraction)
    ## Remove peaks not present in "subset" from the peakIndex
    peaks_in_subset <- which(peaks[, "sample"] %in% subset)
    peakIndex <- lapply(peakIndex, function(z) z[z %in% peaks_in_subset])
    ## Check if we've got a valid peakGroupsMatrix
    ## o Same number of samples.
    ## o range of rt values is within the rtime.
    if (nrow(peakGroupsMatrix)) {
      if (ncol(peakGroupsMatrix) != nSamples)
        stop("'peakGroupsMatrix' has to have the same number of columns ",
             "as there are samples!")
      pg_range <- range(peakGroupsMatrix, na.rm = TRUE)
      rt_range <- range(rtime)
      if (!(pg_range[1] >= rt_range[1] & pg_range[2] <= rt_range[2]))
        stop("The retention times in 'peakGroupsMatrix' have to be within",
             " the retention time range of the experiment!")
      rt <- peakGroupsMatrix
    } else
      rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, subset,
                                   missingSample, extraPeaks)
    if (ncol(rt) != length(subset))
      stop("Length of 'subset' and number of columns of the peak group ",
           "matrix do not match.")
    ## Fix for issue #175
    if (length(rt) == 0)
      stop("No peak groups found in the data for the provided settings")
    message("Performing retention time correction using ", nrow(rt),
            " peak groups.")
    
    ## Calculate the deviation of each peak group in each sample from its
    ## median
    rtdev <- rt - apply(rt, 1, median, na.rm = TRUE)
    
    if (smooth == "loess") {
      mingroups <- min(colSums(!is.na(rt)))
      if (mingroups < 4) {
        smooth <- "linear"
        warning("Too few peak groups for 'loess', reverting to linear",
                " method")
      } else if (mingroups * span < 4) {
        span <- 4 / mingroups
        warning("Span too small for 'loess' and the available number of ",
                "peak groups, resetting to ", round(span, 2))
      }
    }
    
    rtdevsmo <- vector("list", nSamples)
    
    ## Code for checking to see if retention time correction is overcorrecting
    rtdevrange <- range(rtdev, na.rm = TRUE)
    warn.overcorrect <- FALSE
    warn.tweak.rt <- FALSE
    
    rtime_adj <- rtime
    ## Adjust samples in subset.
    for (i in seq_along(subset)) {
      i_all <- subset[i]              # Index of sample in whole dataset.
      pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i]))
      
      ## order the data.frame such that rt and rtdev are increasingly ordered.
      pk_idx <- order(pts$rt, pts$rtdev)
      pts <- pts[pk_idx, ]
      if (smooth == "loess") {
        lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span,
                                     degree = 1, family = family))
        
        rtdevsmo[[i]] <- na.flatfill(
          predict(lo, data.frame(rt = rtime[[i_all]])))
        ## Remove singularities from the loess function
        rtdevsmo[[i]][abs(rtdevsmo[[i]]) >
                        quantile(abs(rtdevsmo[[i]]), 0.9,
                                 na.rm = TRUE) * 2] <- NA
        if (length(naidx <- which(is.na(rtdevsmo[[i]]))))
          rtdevsmo[[i]][naidx] <- suppressWarnings(
            approx(na.omit(data.frame(rtime[[i_all]], rtdevsmo[[i]])),
                   xout = rtime[[i_all]][naidx], rule = 2)$y
          )
        
        ## Check if there are adjusted retention times that are not ordered
        ## increasingly. If there are, search for each first unordered rt
        ## the next rt that is larger and linearly interpolate the values
        ## in between (see issue #146 for an illustration).
        while (length(decidx <- which(diff(rtime[[i_all]] - rtdevsmo[[i]]) < 0))) {
          warn.tweak.rt <- TRUE  ## Warn that we had to tweak the rts.
          rtadj <- rtime[[i_all]] - rtdevsmo[[i]]
          rtadj_start <- rtadj[decidx[1]] ## start interpolating from here
          ## Define the
          next_larger <- which(rtadj > rtadj[decidx[1]])
          if (length(next_larger) == 0) {
            ## Fix if there is no larger adjusted rt up to the end.
            next_larger <- length(rtadj) + 1
            rtadj_end <- rtadj_start
          } else {
            next_larger <- min(next_larger)
            rtadj_end <- rtadj[next_larger]
          }
          ## linearly interpolate the values in between.
          adj_idxs <- (decidx[1] + 1):(next_larger - 1)
          incr <- (rtadj_end - rtadj_start) / length(adj_idxs)
          rtdevsmo[[i]][adj_idxs] <- rtime[[i_all]][adj_idxs] -
            (rtadj_start + (1:length(adj_idxs)) * incr)
        }
        
        rtdevsmorange <- range(rtdevsmo[[i]])
        if (any(rtdevsmorange / rtdevrange > 2))
          warn.overcorrect <- TRUE
      } else {
        if (nrow(pts) < 2) {
          stop("Not enough peak groups even for linear smoothing ",
               "available!")
        }
        ## Use lm instead?
        fit <- lsfit(pts$rt, pts$rtdev)
        rtdevsmo[[i]] <- rtime[[i_all]] * fit$coef[2] + fit$coef[1]
        ptsrange <- range(pts$rt)
        minidx <- rtime[[i_all]] < ptsrange[1]
        maxidx <- rtime[[i_all]] > ptsrange[2]
        rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)]
        rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)]
      }
      ## Finally applying the correction
      rtime_adj[[i_all]] <- rtime[[i_all]] - rtdevsmo[[i]]
    }
    
    if (warn.overcorrect) {
      warning("Fitted retention time deviation curves exceed points by more",
              " than 2x. This is dangerous and the algorithm is probably ",
              "overcorrecting your data. Consider increasing the span ",
              "parameter or switching to the linear smoothing method.")
    }
    
    if (warn.tweak.rt) {
      warning(call. = FALSE, "Adjusted retention times had to be ",
              "re-adjusted for some files to ensure them being in the same",
              " order than the raw retention times. A call to ",
              "'dropAdjustedRtime' might thus fail to restore retention ",
              "times of chromatographic peaks to their original values. ",
              "Eventually consider to increase the value of the 'span' ",
              "parameter.")
    }
    rtime_adj
  }

#'PerformPeakFiling
#'@description PerformPeakFiling
#'@param mSet the mSet object generated by PerformPeakPicking function.
#'@param param param list generated by updateRawSpectraParam function.
#'@param BPPARAM parallel method used for data processing. Default is bpparam().
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
PerformPeakFiling<-function(mSet,param,BPPARAM=bpparam()){
  
  ## Preparing Something
  print("Starting peak filling!")
  fixedRt <- fixedMz <- expandRt <- expandMz <- 0
  
  if (is.null(param$ppm)){
    ppm <-10
  } else {
    ppm <- param$ppm;
  }
  
  message("Defining peak areas for filling-in...",
          appendLF = FALSE)
  
  aggFunLow <- median
  aggFunHigh <- median
  
  tmp_pks <- mSet$msFeatureData$chromPeaks[, c("rtmin", "rtmax", "mzmin", "mzmax")]
  fdef <- mSet$FeatureGroupTable
  
  pkArea <- do.call(rbind,lapply(fdef$peakidx, function(z) {
    pa <- c(aggFunLow(tmp_pks[z, 1]),
            aggFunHigh(tmp_pks[z, 2]),
            aggFunLow(tmp_pks[z, 3]),
            aggFunHigh(tmp_pks[z, 4]))
    ## Check if we have to apply ppm replacement:
    if (ppm != 0) {
      mzmean <- mean(pa[3:4])
      tittle <- mzmean * (ppm / 2) / 1E6
      if ((pa[4] - pa[3]) < (tittle * 2)) {
        pa[3] <- mzmean - tittle
        pa[4] <- mzmean + tittle
      }
    }
    ## Expand it.
    if (expandRt != 0) {
      diffRt <- (pa[2] - pa[1]) * expandRt / 2
      pa[1] <- pa[1] - diffRt
      pa[2] <- pa[2] + diffRt
    }
    if (expandMz != 0) {
      diffMz <- (pa[4] - pa[3]) * expandMz / 2
      pa[3] <- pa[3] - diffMz
      pa[4] <- pa[4] + diffMz
    }
    if (fixedMz != 0) {
      pa[3] <- pa[3] - fixedMz
      pa[4] <- pa[4] + fixedMz
    }
    if (fixedRt != 0) {
      pa[1] <- pa[1] - fixedRt
      pa[2] <- pa[2] + fixedRt
    }
    pa
  }))
  
  rm(tmp_pks)
  message(".", appendLF = FALSE)
  colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax")
  ## Add mzmed column - needed for MSW peak filling.
  pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea,
                  mzmed = as.numeric(fdef$mzmed))
  
  if (class(mSet[[2]]) == "OnDiskMSnExp"){
    format <- "onDiskData"
  } else {
    format <- "inMemoryData"
  }
  
  fNames <- mSet[[format]]@phenoData@data[["sample_name"]]
  pks <- mSet$msFeatureData$chromPeaks
  
  pkGrpVal <-
    .feature_values(pks = pks, fts = mSet$FeatureGroupTable,
                    method = "medret", value = "into",
                    intensity = "into", colnames = fNames,
                    missing = NA)
  
  message(".", appendLF = FALSE)
  ## Check if there is anything to fill...
  if (!any(is.na(rowSums(pkGrpVal)))) {
    message("No missing peaks present.")
    return(mSet)
  }
  message(".", appendLF = FALSE)
  ## Split the object by file and define the peaks for which
  pkAreaL <- objectL <- vector("list", length(fNames))
 
  
  ## We need "only" a list of OnDiskMSnExp, one for each file but
  ## instead of filtering by file we create small objects to keep
  ## memory requirement to a minimum.
  if (format == "onDiskData"){
    
    req_fcol <- requiredFvarLabels("OnDiskMSnExp")
    min_fdata <- mSet[[format]]@featureData@data[, req_fcol]
    
  ###
    res <- mSet[["msFeatureData"]][["adjustedRT"]]
    res <- unlist(res, use.names = FALSE)
    
    fidx <- fData(mSet[[format]])$fileIdx # a issue for inmemory data
    names(fidx) <- featureNames(mSet[[format]])
    sNames <- unlist(split(featureNames(mSet[[format]]), fidx),
                     use.names = FALSE)
    
    names(res) <- sNames
    res <- res[featureNames(mSet[[format]])]
    
    min_fdata$retentionTime <- res
    
    for (i in 1:length(mSet[[format]]@phenoData@data[["sample_name"]])) {
      
      fd <- min_fdata[min_fdata$fileIdx == i, ]
      fd$fileIdx <- 1
      
      objectL[[i]] <- new(
        "OnDiskMSnExp",
        processingData = new("MSnProcess",
                             files = mSet[[format]]@processingData@files[i]),
        featureData = new("AnnotatedDataFrame", fd),
        phenoData = new("NAnnotatedDataFrame",
                        data.frame(sampleNames = "1")),
        experimentData = new("MIAPE",
                             instrumentManufacturer = "a",
                             instrumentModel = "a",
                             ionSource = "a",
                             analyser = "a",
                             detectorType = "a"))
      
      ## Want to extract intensities only for peaks that were not
      ## found in a sample.
      
      pkAreaL[[i]] <- pkArea[is.na(pkGrpVal[, i]), , drop = FALSE]
    }
    
    message(" OK\nStart integrating peak areas from original files")
    
    cp_colnames <- colnames(mSet[["msFeatureData"]][["chromPeaks"]])
    
    ## Extraction designed for centWave
    res <- bpmapply(FUN = .getChromPeakData, 
                    objectL,
                    pkAreaL, 
                    as.list(1:length(objectL)),
                    MoreArgs = list(cn = cp_colnames,
                                    mzCenterFun = "weighted.mean"),
                    BPPARAM = BPPARAM, 
                    SIMPLIFY = FALSE)
    
 
  } else {
  
    message(" OK\nStart integrating peak areas from original files")
    
    mzCenterFun = "weighted.mean"
    
    scan_set<-names(mSet[[format]]@assayData);
    scan_set_ordered <- sort(scan_set);
    
    assayData <- sapply(scan_set_ordered, FUN = function(x){mSet[[format]]@assayData[[x]]})
    
    #assayData <- lapply(scan_set,FUN=function(x){mSet[[format]]@assayData[[x]]});
    assayData <- split(assayData,fromFile(mSet[[format]]));
    
    
    rtim_all <- split(rtime(mSet[[format]]),fromFile(mSet[[format]]))
    filesname <- basename(MSnbase::fileNames(mSet[[format]]))
    res_new <-list()
    
    for (ii in 1:length(mSet[[format]]@phenoData@data[["sample_name"]])){
      
      cn <-colnames(mSet[["msFeatureData"]][["chromPeaks"]])
      peakArea <- pkArea[is.na(pkGrpVal[, ii]), , drop = FALSE];
      
      ncols <- length(cn)
      res <- matrix(ncol = ncols, nrow = nrow(peakArea))
      colnames(res) <- cn
      
      res[, "sample"] <- ii
      res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")]
      ## Load the data
      message("Requesting ", nrow(res), " peaks from ",
              filesname[ii], " ... ", appendLF = FALSE)
      
      spctr <- assayData[[ii]]
      
      mzs <- lapply(spctr, mz)
      valsPerSpect <- lengths(mzs)
      ints <- unlist(lapply(spctr, intensity), use.names = FALSE)
      rm(spctr)
      
      mzs <- unlist(mzs, use.names = FALSE)
      mzs_range <- range(mzs)
      rtim <- rtim_all[[ii]]
      rtim_range <- range(rtim)
      
      for (i in seq_len(nrow(res))) {
        rtr <- peakArea[i, c("rtmin", "rtmax")]
        mzr <- peakArea[i, c("mzmin", "mzmax")]
        ## If the rt range is completely out; additional fix for #267
        if (rtr[2] < rtim_range[1] | rtr[1] > rtim_range[2] |
            mzr[2] < mzs_range[1] | mzr[1] > mzs_range[2]) {
          res[i, ] <- rep(NA_real_, ncols)
          next
        }
        ## Ensure that the mz and rt region is within the range of the data.
        rtr[1] <- max(rtr[1], rtim_range[1])
        rtr[2] <- min(rtr[2], rtim_range[2])
        mzr[1] <- max(mzr[1], mzs_range[1])
        mzr[2] <- min(mzr[2], mzs_range[2])
        mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim,
                                        valsPerSpect = valsPerSpect, rtrange = rtr,
                                        mzrange = mzr)
        if (length(mtx)) {
          if (any(!is.na(mtx[, 3]))) {
            
            res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) *
              ((rtr[2] - rtr[1]) /
                 max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)))
            maxi <- which.max(mtx[, 3])
            res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)]
            res[i, c("rtmin", "rtmax")] <- rtr
            ## Calculate the intensity weighted mean mz
            meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
            if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
            res[i, "mz"] <- meanMz
          } else {
            res[i, ] <- rep(NA_real_, ncols)
          }
        } else {
          res[i, ] <- rep(NA_real_, ncols)
        }
      }
      
      
      message("got ", sum(!is.na(res[, "into"])), ".")
      
      res_new[[ii]] <- res
      pkAreaL[[ii]] <- pkArea[is.na(pkGrpVal[, ii]), , drop = FALSE]
      
    }
    
    res <-res_new
    
    
}
  
 
  res <- do.call(rbind, res)
  ## cbind the group_idx column to track the feature/peak group.
  res <- cbind(res, group_idx = do.call(rbind, pkAreaL)[, "group_idx"])
  ## Remove those without a signal
  res <- res[!is.na(res[, "into"]), , drop = FALSE]
  
  
  
  if (nrow(res) == 0) {
    warning("Could not integrate any signal for the missing ",
            "peaks! Consider increasing 'expandMz' and 'expandRt'.")
    return(mSet)
  }
  
  ## Get the msFeatureData:
  newFd <- mSet$msFeatureData
  
  incr <- nrow(mSet$msFeatureData$chromPeaks)
  for (i in unique(res[, "group_idx"])) {
    fdef$peakidx[[i]] <- c(fdef$peakidx[[i]],
                           (which(res[, "group_idx"] == i) + incr))
  }
  ## Define IDs for the new peaks; include fix for issue #347
  maxId <- max(as.numeric(sub("^CP", "",
                              rownames(mSet[["msFeatureData"]][["chromPeaks"]]))))
  if (maxId < 1)
    stop("chromPeaks matrix lacks rownames; please update ",
         "'object' with the 'updateObject' function.")
  toId <- maxId + nrow(res)
  
  rownames(res) <- sprintf(
    paste0("CP", "%0", ceiling(log10(toId + 1L)), "d"),
    (maxId + 1L):toId)
  
  newFd[["chromPeaks"]] <- rbind(mSet[["msFeatureData"]][["chromPeaks"]],
                                 res[, -ncol(res)])
  
  cpd <- newFd$chromPeakData[rep(1L, nrow(res)), , drop = FALSE]
  cpd[,] <- NA
  cpd$ms_level <- as.integer(1)
  cpd$is_filled <- TRUE
  
  newFd$chromPeakData <- rbind(newFd$chromPeakData, cpd)
  rownames(newFd$chromPeakData) <- rownames(newFd$chromPeaks)
  
  mSet$msFeatureData <- newFd
  mSet$FeatureGroupTable <- fdef
  
  mSet$xcmsSet <- mSet2xcmsSet(mSet)
  
  return(mSet)
  
}

#'mSet2xcmsSet
#'@description mSet2xcmsSet
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#'
mSet2xcmsSet <- function(mSet) {
  xs <- new("xcmsSet")
  
  xs@peaks <- mSet[["msFeatureData"]][["chromPeaks"]]
  
  fgs <- mSet$FeatureGroupTable
  
  xs@groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)])
  rownames(xs@groups) <- NULL
  xs@groupidx <- fgs$peakidx
  
  rts <- list()
  
  if (class(mSet[[2]]) == "OnDiskMSnExp"){
    format <- "onDiskData"
  } else {
    format <- "inMemoryData"
  }
  
  
  ## Ensure we're getting the raw rt
  rts$raw <- rtime(mSet[[format]])
  rts$corrected <- mSet[["msFeatureData"]][["adjustedRT"]]
  
  xs@rt <- rts
  
  ## @phenoData
  xs@phenoData <- pData(mSet[[format]])
  ## @filepaths
  xs@filepaths <- fileNames(mSet[[format]])
  
  ## @profinfo (list)
  profMethod <- NA
  profStep <- NA
  profParam <- list()
  ## If we've got any MatchedFilterParam we can take the values from there
  
  ## @mslevel <- msLevel?
  xs@mslevel <- 1
  
  ## @scanrange
  xs@scanrange <- range(scanIndex(mSet[[format]]))
  
  ## @filled ... not yet.
  if (any(mSet$msFeatureData$chromPeakData$is_filled)) {
    fld <- which(mSet$msFeatureData$chromPeakData$is_filled)
    xs@filled <- as.integer(fld)
  }
  
  return(xs)
}

#'updateRawSpectraParam
#'@description updateRawSpectraParam
#'@param Params object generated by SetPeakParams function.
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'@export
#'@ref Smith, C.A. et al. 2006. Analytical Chemistry, 78, 779-787
#' Mcgill University
#' License: GNU GPL (>= 2)
updateRawSpectraParam <- function (Params){
  
  param <- list();
  
  # 0. Method Define
  param$Peak_method <- Params$Peak_method
  param$RT_method <- Params$RT_method
  
  # 1.1 update peak picking params - centWave
  if (param$Peak_method == "centWave"){
  
    param$ppm <- as.numeric(Params[["ppm"]]);
    param$peakwidth <- c(as.numeric(Params[["min_peakwidth"]]),
                         as.numeric(Params[["max_peakwidth"]]));
    param$snthresh <- as.numeric(Params[["snthresh"]]);
    param$prefilter <- c(as.numeric(Params[["prefilter"]]), 
                         as.numeric(Params[["value_of_prefilter"]]));
    param$roiList <- list();
    param$firstBaselineCheck <- T;
    param$roiScales <- numeric(0);
    
    param$mzCenterFun <- Params[["mzCenterFun"]];
    param$integrate <- Params[["integrate"]];
    param$mzdiff <- as.numeric(Params[["mzdiff"]]);
    param$fitgauss <- as.logical(Params[["fitgauss"]]);
    param$noise <- as.numeric(Params[["noise"]]);
    param$verboseColumns <- as.logical(Params[["verbose.columns"]]);
    
    param$binSize <-0.25; # density Param
  
  } else if (param$Peak_method == "matchedFilter") {
    
    param$fwhm <- as.numeric(Params$fwhm);
    param$sigma <- as.numeric(Params$sigma)
    param$steps <- as.numeric(Params$steps)
    param$snthresh <- as.numeric(Params$snthresh)
    
    param$mzdiff <- as.numeric(Params$mzdiff)
    param$bw <- as.numeric(Params$bw)
    param$max <- as.numeric(Params$max)
    
    param$impute <- "none";
    param$baseValue<- numeric(0);
    param$distance<- numeric(0);
    param$index<- F;
   
    param$binSize <- 0.1;
  }
  # 2.1 update grouping params - density
  
  param$bw<-as.numeric(Params[["bw"]]);
  param$minFraction <- as.numeric(Params[["minFraction"]]);
  param$minSamples <- as.numeric(Params[["minSamples"]]);
  param$maxFeatures <- as.numeric(Params[["maxFeatures"]]);
  
  # 3.1 update RT correction params - peakgroup
  
  param$extraPeaks <- as.numeric(Params[["extra"]]);
  
  param$smooth <- Params[["smooth"]];
  param$span <- as.numeric(Params[["span"]]);
  param$family <- Params[["family"]];
  
  param$subsetAdjust <- "average";
  
  # Finished !
  print(paste("Parameters for",param$Peak_method, "have been successfully parsed!"))
  
  return(param)
  
}

#'creatPeakTable
#'@description creatPeakTable
#'@author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'
creatPeakTable <- function(xset){
  
  if (length(xset@phenoData[["sample_name"]]) == 1) {
    return(xset@peaks)
  }
  
  groupmat <- xset@groups
  
  ts <- data.frame(cbind(groupmat,.groupval(xset, value="into")), row.names = NULL)
  
  cnames <- colnames(ts)
  
  if (cnames[1] == 'mzmed') {
    cnames[1] <- 'mz'
  } else {
    stop ('mzmed column missing')
  }
  if (cnames[4] == 'rtmed') {
    cnames[4] <- 'rt'
  } else {
    stop ('mzmed column missing')
  }
  
  colnames(ts) <- cnames
  
  return(ts)
  
}


########         -----------=========== Internal Functions From XCMS ===========-------------        #
# 
#' @references Smith, C.A., Want, E.J., O'Maille, G., Abagyan,R., Siuzdak, G. (2006). 
#               "XCMS: Processing mass spectrometry data for metabolite profiling using nonlinear 
#                 peak alignment, matching and identification." Analytical Chemistry, 78, 779-787.

continuousPtsAboveThreshold <- function(y, threshold, num, istart = 1) {
  if (!is.double(y)) y <- as.double(y)
  if (.C(C_continuousPtsAboveThreshold,
         y,
         as.integer(istart-1),
         length(y),
         threshold = as.double(threshold),
         num = as.integer(num),
         n = integer(1))$n > 0) TRUE else FALSE
}
getLocalNoiseEstimate <- function(d, td, ftd, noiserange, Nscantime, threshold, num) {
  
  if (length(d) < Nscantime) {
    
    ## noiserange[2] is full d-range
    drange <- which(td %in% ftd)
    n1 <- d[-drange] ## region outside the detected ROI (wide)
    n1.cp <- continuousPtsAboveThresholdIdx(n1, threshold=threshold,num=num) ## continousPtsAboveThreshold (probably peak) are subtracted from data for local noise estimation
    n1 <- n1[!n1.cp]
    if (length(n1) > 1)  {
      baseline1 <- mean(n1)
      sdnoise1 <- sd(n1)
    } else
      baseline1 <- sdnoise1 <- 1
    
    ## noiserange[1]
    d1 <- drange[1]
    d2 <- drange[length(drange)]
    nrange2 <- c(max(1,d1 - noiserange[1]) : d1, d2 : min(length(d),d2 + noiserange[1]))
    n2 <- d[nrange2] ## region outside the detected ROI (narrow)
    n2.cp <- continuousPtsAboveThresholdIdx(n2, threshold=threshold,num=num) ## continousPtsAboveThreshold (probably peak) are subtracted from data for local noise estimation
    n2 <- n2[!n2.cp]
    if (length(n2) > 1)  {
      baseline2 <- mean(n2)
      sdnoise2 <- sd(n2)
    } else
      baseline2 <- sdnoise2 <- 1
    
  } else {
    trimmed <- trimm(d,c(0.05,0.95))
    baseline1 <- baseline2 <- mean(trimmed)
    sdnoise1 <- sdnoise2 <- sd(trimmed)
  }
  
  c(min(baseline1,baseline2),min(sdnoise1,sdnoise2))
}
continuousPtsAboveThresholdIdx <- function(y, threshold, num, istart = 1) {
  if (!is.double(y)) y <- as.double(y)
  as.logical(.C(C_continuousPtsAboveThresholdIdx,
                y,
                as.integer(istart-1),
                length(y),
                threshold = as.double(threshold),
                num = as.integer(num),
                n = integer(length(y)))$n)
}
MSW.cwt <- function (ms, scales = 1, wavelet = "mexh") { ## modified from package MassSpecWavelet
  if (wavelet == "mexh") {
    psi_xval <- seq(-6, 6, length = 256)
    psi <- (2/sqrt(3) * pi^(-0.25)) * (1 - psi_xval^2) *
      exp(-psi_xval^2/2)
  }
  else if (is.matrix(wavelet)) {
    if (nrow(wavelet) == 2) {
      psi_xval <- wavelet[1, ]
      psi <- wavelet[2, ]
    }
    else if (ncol(wavelet) == 2) {
      psi_xval <- wavelet[, 1]
      psi <- wavelet[, 2]
    }
    else {
      stop("Unsupported wavelet format!")
    }
  }
  else {
    stop("Unsupported wavelet!")
  }
  oldLen <- length(ms)
  ms <- MSW.extendNBase(ms, nLevel = NULL, base = 2)
  len <- length(ms)
  nbscales <- length(scales)
  wCoefs <- NULL
  psi_xval <- psi_xval - psi_xval[1]
  dxval <- psi_xval[2]
  xmax <- psi_xval[length(psi_xval)]
  for (i in 1:length(scales)) {
    scale.i <- scales[i]
    f <- rep(0, len)
    j <- 1 + floor((0:(scale.i * xmax))/(scale.i * dxval))
    if (length(j) == 1)
      j <- c(1, 1)
    lenWave <- length(j)
    f[1:lenWave] <- rev(psi[j]) - mean(psi[j])
    if (length(f) > len)
    {i<-i-1;break;}   ##  stop(paste("scale", scale.i, "is too large!"))
    wCoefs.i <- 1/sqrt(scale.i) * convolve(ms, f)
    wCoefs.i <- c(wCoefs.i[(len - floor(lenWave/2) + 1):len],
                  wCoefs.i[1:(len - floor(lenWave/2))])
    wCoefs <- cbind(wCoefs, wCoefs.i)
  }
  if (i < 1) return(NA)
  scales <- scales[1:i]
  if (length(scales) == 1)
    wCoefs <- matrix(wCoefs, ncol = 1)
  colnames(wCoefs) <- scales
  wCoefs <- wCoefs[1:oldLen, , drop = FALSE]
  wCoefs
}
MSW.extendNBase <- function(x, nLevel=1, base=2, ...) { ## from package MassSpecWavelet
  if (!is.matrix(x)) x <- matrix(x, ncol=1)
  
  nR <- nrow(x)
  if (is.null(nLevel)) {
    nR1 <- nextn(nR, base)
  } else {
    nR1 <- ceiling(nR / base^nLevel) * base^nLevel
  }
  if (nR != nR1) {
    x <- MSW.extendLength(x, addLength=nR1-nR, ...)
  }
  x
}
MSW.extendLength <-   function(x, addLength=NULL, method=c('reflection', 'open', 'circular'), direction=c('right', 'left', 'both'))  {       ## from package MassSpecWavelet
  if (is.null(addLength)) stop('Please provide the length to be added!')
  if (!is.matrix(x)) x <- matrix(x, ncol=1)
  method <- match.arg(method)
  direction <- match.arg(direction)
  
  nR <- nrow(x)
  nR1 <- nR + addLength
  if (direction == 'both') {
    left <- right <- addLength
  } else if (direction == 'right') {
    left <- 0
    right <- addLength
  } else if (direction == 'left') {
    left <- addLength
    right <- 0
  }
  
  if (right > 0) {
    x <- switch(method,
                reflection =rbind(x, x[nR:(2 * nR - nR1 + 1), , drop=FALSE]),
                open = rbind(x, matrix(rep(x[nR,], addLength), ncol=ncol(x), byrow=TRUE)),
                circular = rbind(x, x[1:(nR1 - nR),, drop=FALSE]))
  }
  
  if (left > 0) {
    x <- switch(method,
                reflection =rbind(x[addLength:1, , drop=FALSE], x),
                open = rbind(matrix(rep(x[1,], addLength), ncol=ncol(x), byrow=TRUE), x),
                circular = rbind(x[(2 * nR - nR1 + 1):nR,, drop=FALSE], x))
  }
  if (ncol(x) == 1)  x <- as.vector(x)
  
  x
}
MSW.getLocalMaximumCWT <-  function(wCoefs, minWinSize=5, amp.Th=0)  {        ## from package MassSpecWavelet
  localMax <- NULL
  scales <- as.numeric(colnames(wCoefs))
  
  for (i in 1:length(scales)) {
    scale.i <- scales[i]
    winSize.i <- scale.i * 2 + 1
    if (winSize.i < minWinSize) {
      winSize.i <- minWinSize
    }
    temp <- MSW.localMaximum(wCoefs[,i], winSize.i)
    localMax <- cbind(localMax, temp)
  }
  ## Set the values less than peak threshold as 0
  localMax[wCoefs < amp.Th] <- 0
  colnames(localMax) <- colnames(wCoefs)
  rownames(localMax) <- rownames(wCoefs)
  localMax
}
MSW.localMaximum <-  function (x, winSize = 5)  {   ## from package MassSpecWavelet
  len <- length(x)
  rNum <- ceiling(len/winSize)
  
  ## Transform the vector as a matrix with column length equals winSize
  ##		and find the maximum position at each row.
  y <- matrix(c(x, rep(x[len], rNum * winSize - len)), nrow=winSize)
  y.maxInd <- apply(y, 2, which.max)
  ## Only keep the maximum value larger than the boundary values
  selInd <- which(apply(y, 2, function(x) max(x) > x[1] & max(x) > x[winSize]))
  
  ## keep the result
  localMax <- rep(0, len)
  localMax[(selInd-1) * winSize + y.maxInd[selInd]] <- 1
  
  ## Shift the vector with winSize/2 and do the same operation
  shift <- floor(winSize/2)
  rNum <- ceiling((len + shift)/winSize)
  y <- matrix(c(rep(x[1], shift), x, rep(x[len], rNum * winSize - len - shift)), nrow=winSize)
  y.maxInd <- apply(y, 2, which.max)
  ## Only keep the maximum value larger than the boundary values
  selInd <- which(apply(y, 2, function(x) max(x) > x[1] & max(x) > x[winSize]))
  localMax[(selInd-1) * winSize + y.maxInd[selInd] - shift] <- 1
  
  ## Check whether there is some local maxima have in between distance less than winSize
  maxInd <- which(localMax > 0)
  selInd <- which(diff(maxInd) < winSize)
  if (length(selInd) > 0) {
    selMaxInd1 <- maxInd[selInd]
    selMaxInd2 <- maxInd[selInd + 1]
    temp <- x[selMaxInd1] - x[selMaxInd2]
    localMax[selMaxInd1[temp <= 0]] <- 0
    localMax[selMaxInd2[temp > 0]] <- 0
  }
  
  localMax
}
MSW.getRidge <-  function(localMax, iInit=ncol(localMax), step=-1, iFinal=1, minWinSize=3, gapTh=3, skip=NULL)  {  ## modified from package MassSpecWavelet
  
  scales <- as.numeric(colnames(localMax))
  if (is.null(scales))  scales <- 1:ncol(localMax)
  
  maxInd_curr <- which(localMax[, iInit] > 0)
  nMz <- nrow(localMax)
  
  if (is.null(skip))	{
    skip <- iInit + 1
  }
  
  ## Identify all the peak pathes from the coarse level to detail levels (high column to low column)
  ## Only consider the shortest path
  if ( ncol(localMax) > 1 ) colInd <- seq(iInit+step, iFinal, step)
  else colInd <- 1
  ridgeList <- as.list(maxInd_curr)
  names(ridgeList) <- maxInd_curr
  peakStatus <- as.list(rep(0, length(maxInd_curr)))
  names(peakStatus) <- maxInd_curr
  
  ## orphanRidgeList keep the ridges disconnected at certain scale level
  ## Changed by Pan Du 05/11/06
  orphanRidgeList <- NULL
  orphanRidgeName <- NULL
  nLevel <- length(colInd)
  
  for (j in 1:nLevel) {
    col.j <- colInd[j]
    scale.j <- scales[col.j]
    
    if (colInd[j] == skip) {
      oldname <- names(ridgeList)
      ridgeList <- lapply(ridgeList, function(x) c(x, x[length(x)]))
      ##peakStatus <- lapply(peakStatus, function(x) c(x, x[length(x)]))
      names(ridgeList) <- oldname
      ##names(peakStatus) <- oldname
      next
    }
    
    if (length(maxInd_curr) == 0) {
      maxInd_curr <- which(localMax[, col.j] > 0)
      next
    }
    
    ## The slide window size is proportional to the CWT scale
    ## winSize.j <- scale.j / 2 + 1
    winSize.j <- floor(scale.j/2)
    if (winSize.j < minWinSize) {
      winSize.j <- minWinSize
    }
    
    selPeak.j <- NULL
    remove.j <- NULL
    for (k in 1:length(maxInd_curr)) {
      ind.k <- maxInd_curr[k]
      start.k <- ifelse(ind.k-winSize.j < 1, 1, ind.k-winSize.j)
      end.k <- ifelse(ind.k+winSize.j > nMz, nMz, ind.k+winSize.j)
      ind.curr <- which(localMax[start.k:end.k, col.j] > 0) + start.k - 1
      ##ind.curr <- which(localMax[, col.j] > 0)
      if (length(ind.curr) == 0) {
        status.k <- peakStatus[[as.character(ind.k)]]
        ## bug  work-around
        if (is.null(status.k)) status.k <- gapTh +1
        ##
        if (status.k > gapTh & scale.j >= 2) {
          temp <- ridgeList[[as.character(ind.k)]]
          orphanRidgeList <- c(orphanRidgeList, list(temp[1:(length(temp)-status.k)]))
          orphanRidgeName <- c(orphanRidgeName, paste(col.j + status.k + 1, ind.k, sep='_'))
          remove.j <- c(remove.j, as.character(ind.k))
          next
        } else {
          ind.curr <- ind.k
          peakStatus[[as.character(ind.k)]] <- status.k + 1
        }
      } else {
        peakStatus[[as.character(ind.k)]] <- 0
        if (length(ind.curr) >= 2)  ind.curr <- ind.curr[which.min(abs(ind.curr - ind.k))]
      }
      ridgeList[[as.character(ind.k)]] <- c(ridgeList[[as.character(ind.k)]], ind.curr)
      selPeak.j <- c(selPeak.j, ind.curr)
    }
    ## Remove the disconnected lines from the currrent list
    if (length(remove.j) > 0) {
      removeInd <- which(names(ridgeList) %in% remove.j)
      ridgeList <- ridgeList[-removeInd]
      peakStatus <- peakStatus[-removeInd]
    }
    
    ## Check for duplicated selected peaks and only keep the one with the longest path.
    dupPeak.j <- unique(selPeak.j[duplicated(selPeak.j)])
    if (length(dupPeak.j) > 0) {
      removeInd <- NULL
      for (dupPeak.jk in dupPeak.j) {
        selInd <- which(selPeak.j == dupPeak.jk)
        selLen <- sapply(ridgeList[selInd], length)
        removeInd.jk <- which.max(selLen)
        removeInd <- c(removeInd, selInd[-removeInd.jk])
        orphanRidgeList <- c(orphanRidgeList, ridgeList[removeInd.jk])
        orphanRidgeName <- c(orphanRidgeName, paste(col.j, selPeak.j[removeInd.jk], sep='_'))
      }
      selPeak.j <- selPeak.j[-removeInd]
      ridgeList <- ridgeList[-removeInd]
      peakStatus <- peakStatus[-removeInd]
    }
    
    ## Update the names of the ridgeList as the new selected peaks
    ##if (scale.j >= 2) {
    if (length(ridgeList) > 0) names(ridgeList) <- selPeak.j
    if (length(peakStatus) > 0) names(peakStatus) <- selPeak.j
    ##}
    
    ## If the level is larger than 3, expand the peak list by including other unselected peaks at that level
    if (scale.j >= 2) {
      maxInd_next <- which(localMax[, col.j] > 0)
      unSelPeak.j <- maxInd_next[!(maxInd_next %in% selPeak.j)]
      newPeak.j <- as.list(unSelPeak.j)
      names(newPeak.j) <- unSelPeak.j
      ## Update ridgeList
      ridgeList <- c(ridgeList, newPeak.j)
      maxInd_curr <- c(selPeak.j, unSelPeak.j)
      ## Update peakStatus
      newPeakStatus <- as.list(rep(0, length(newPeak.j)))
      names(newPeakStatus) <- newPeak.j
      peakStatus <- c(peakStatus, newPeakStatus)
    } else {
      maxInd_curr <- selPeak.j
    }
  }
  
  ## Attach the peak level at the beginning of the ridge names
  if (length(ridgeList) > 0) names(ridgeList) <- paste(1, names(ridgeList), sep='_')
  if (length(orphanRidgeList) > 0) names(orphanRidgeList) <- orphanRidgeName
  ## Combine ridgeList and orphanRidgeList
  ridgeList <- c(ridgeList, orphanRidgeList)
  if (length(ridgeList) == 0) return(NULL)
  
  ## Reverse the order as from the low level to high level.
  ridgeList <- lapply(ridgeList, rev)
  ## order the ridgeList in increasing order
  ##ord <- order(selPeak.j)
  ##ridgeList <- ridgeList[ord]
  
  ## Remove possible duplicated ridges
  ridgeList <- ridgeList[!duplicated(names(ridgeList))]
  
  attr(ridgeList, 'class') <- 'ridgeList'
  attr(ridgeList, 'scales') <- scales
  return(ridgeList)
}
descendMin <- function(y, istart = which.max(y)) {
  
  if (!is.double(y)) y <- as.double(y)
  unlist(.C(C_DescendMin,
            y,
            length(y),
            as.integer(istart-1),
            ilower = integer(1),
            iupper = integer(1))[4:5]) + 1
}
descendMinTol <- function(d,startpos,maxDescOutlier) {
  l <- startpos[1]; r <- startpos[2]; outl <- 0; N <- length(d)
  ## left
  while ((l > 1) && (d[l] > 0) && outl <= maxDescOutlier) {
    if (outl > 0) vpos <- opos else vpos <- l
    if (d[l-1] > d[vpos]) outl <- outl + 1 else outl <- 0
    if (outl == 1) opos <- l
    l <- l -1
  }
  if (outl > 0) l <- l + outl
  ## right
  outl <- 0;
  while ((r < N) && (d[r] > 0) && outl <= maxDescOutlier) {
    if (outl > 0) vpos <- opos else vpos <- r
    if (d[r+1] > d[vpos]) outl <- outl + 1 else outl <- 0
    if (outl == 1) opos <- r
    r <- r + 1
  }
  if (outl > 0) r <- r - outl
  c(l,r)
}
joinOverlappingPeaks <- function(td, d, otd, omz, od, scantime, scan.range, peaks, maxGaussOverlap=0.5) {
  ## Fix issue #284: avoid having identical peaks multiple times in this
  ## matrix.
  peaks <- unique(peaks)
  gausspeaksidx <- which(!is.na(peaks[,"mu"]))
  Ngp <- length(gausspeaksidx)
  if (Ngp == 0)
    return(peaks)
  
  newpeaks <- NULL
  
  gpeaks <- peaks[gausspeaksidx, , drop = FALSE]
  if (nrow(peaks) - Ngp > 0)
    notgausspeaks <- peaks[-gausspeaksidx, , drop = FALSE]
  
  if (Ngp > 1) {
    comb <- which(upper.tri(matrix(0, Ngp, Ngp)), arr.ind = TRUE)
    overlap <- logical(nrow(comb))
    overlap <- rep(FALSE, dim(comb)[1])
    for (k in seq_len(nrow(comb))) {
      p1 <- comb[k, 1]
      p2 <- comb[k, 2]
      overlap[k] <- gaussCoverage(xlim = scan.range,
                                  h1 = gpeaks[p1, "h"],
                                  mu1 = gpeaks[p1, "mu"],
                                  s1 = gpeaks[p1, "sigma"],
                                  h2 = gpeaks[p2, "h"],
                                  mu2 = gpeaks[p2, "mu"],
                                  s2 = gpeaks[p2, "sigma"]) >=
        maxGaussOverlap
    }
  } else overlap <- FALSE
  
  if (any(overlap) && (Ngp > 1)) {
    jlist <- list()
    if (length(which(overlap)) > 1) {
      gm <- comb[overlap, ]
      ## create list of connected components
      cc <- list()
      cc[[1]] <- gm[1,] ## copy first entry
      for (j in 2:dim(gm)[1]) { ## search for connections
        ccl <- unlist(cc)
        nl <- sapply(cc, function(x) length(x))
        ccidx <- rep(1:length(nl),nl)
        idx <- match(gm[j,],ccl)
        if (any(!is.na(idx))) { ## connection found, add to list
          pos <- ccidx[idx[which(!is.na(idx))[1]]]
          cc[[pos]] <- c(cc[[pos]],gm[j,])
        } else  ## create new list element
          cc[[length(cc) + 1]] <- gm[j,]
        
      }
      ccn <- list()
      lcc <- length(cc)
      ins <- rep(FALSE,lcc)
      if (lcc > 1) {
        jcomb <- which(upper.tri(matrix(0,lcc,lcc)),arr.ind = TRUE)
        for (j in 1:dim(jcomb)[1]) {
          j1 <- jcomb[j,1]; j2 <- jcomb[j,2]
          if (any(cc[[j1]] %in% cc[[j2]]))
            ccn[[length(ccn) +1]] <- unique(c(cc[[j1]],cc[[j2]]))
          else {
            if (!ins[j1]) {
              ccn[[length(ccn) +1]] <- unique(cc[[j1]])
              ins[j1] <- TRUE
            }
            if (!ins[j2]) {
              ccn[[length(ccn) +1]] <- unique(cc[[j2]])
              ins[j2] <- TRUE
            }
          }
        }
      } else ccn <- cc;
      
      size <- sapply(ccn, function(x) length(x))
      s2idx <- which(size >= 2)
      
      if (length(s2idx) > 0) {
        for (j in 1:length(s2idx)) {
          pgroup <- unique(ccn[[ s2idx[j] ]])
          jlist[[j]] <- pgroup
        }
      } else stop('(length(s2idx) = 0) ?!?')
    } else jlist[[1]] <- comb[overlap, ]
    
    ## join all peaks belonging to one cc
    for (j in seq_along(jlist)) {
      jidx <- jlist[[j]]
      newpeak <- gpeaks[jidx[1], , drop = FALSE]
      newmin <- min(gpeaks[jidx, "lmin"])
      newmax <- max(gpeaks[jidx, "lmax"])
      newpeak[1, "scpos"] <- -1 ## not defined after join
      newpeak[1, "scmin"] <- -1 ##    ..
      newpeak[1, "scmax"] <- -1 ##    ..
      newpeak[1, "scale"] <- -1 ##    ..
      
      newpeak[1, "maxo"] <- max(gpeaks[jidx, "maxo"])
      newpeak[1, "sn"]   <- max(gpeaks[jidx, "sn"])
      newpeak[1, "lmin"] <- newmin
      newpeak[1, "lmax"] <- newmax
      newpeak[1, "rtmin"] <- scantime[td[newmin]]
      newpeak[1, "rtmax"] <- scantime[td[newmax]]
      newpeak[1,"rt"] <- weighted.mean(gpeaks[jidx, "rt"],
                                       w = gpeaks[jidx, "maxo"])
      
      ## Re-assign m/z values
      p1 <- match(td[newmin], otd)[1]
      p2 <- match(td[newmax], otd)
      p2 <- p2[length(p2)]
      if (is.na(p1)) p1 <- 1
      if (is.na(p2)) p2 <- length(omz)
      mz.value <- omz[p1:p2]
      mz.int <- od[p1:p2]
      
      ## re-calculate m/z value for peak range
      #mzmean <- do.call(mzCenterFun, list(mz = mz.value,
      #                                    intensity = mz.int))
      mzmean <- weighted.mean(mz.value, mz.int)
      mzrange <- range(mz.value)
      newpeak[1, "mz"] <- mzmean
      newpeak[1, c("mzmin","mzmax")] <- mzrange
      
      ## re-fit gaussian
      md <- max(d[newmin:newmax])
      d1 <- d[newmin:newmax] / md
      pgauss <- fitGauss(td[newmin:newmax],
                         d[newmin:newmax],
                         pgauss = list(mu = td[newmin] +
                                         (td[newmax] - td[newmin])/2,
                                       sigma = td[newmax] - td[newmin],
                                       h = max(gpeaks[jidx, "h"])))
      if (!any(is.na(pgauss)) && all(pgauss > 0)) {
        newpeak[1, "mu"]    <- pgauss$mu
        newpeak[1, "sigma"] <- pgauss$sigma
        newpeak[1, "h"]     <- pgauss$h
        newpeak[1, "egauss"]<- sqrt((1/length(td[newmin:newmax])) *
                                      sum(((d1 - gauss(td[newmin:newmax],
                                                       pgauss$h/md,
                                                       pgauss$mu,
                                                       pgauss$sigma))^2)))
      } else { ## re-fit after join failed
        newpeak[1, "mu"]       <- NA
        newpeak[1, "sigma"]    <- NA
        newpeak[1, "h"]        <- NA
        newpeak[1, "egauss"]   <- NA
      }
      
      newpeaks <- rbind(newpeaks, newpeak)
    }
    ## add the remaining peaks
    jp <- unique(unlist(jlist))
    if (dim(peaks)[1] - length(jp) > 0)
      newpeaks <- rbind(newpeaks, gpeaks[-jp, ])
    
  } else
    newpeaks <- gpeaks
  
  grt.min <- newpeaks[, "rtmin"]
  grt.max <- newpeaks[, "rtmax"]
  
  if (nrow(peaks) - Ngp > 0) { ## notgausspeaks
    for (k in 1:nrow(notgausspeaks)) {
      ## here we can only check if they are completely overlapped
      ## by other peaks
      if (!any((notgausspeaks[k, "rtmin"] >= grt.min) &
               (notgausspeaks[k,"rtmax"] <= grt.max)))
        newpeaks <- rbind(newpeaks,notgausspeaks[k,])
    }
  }
  
  rownames(newpeaks) <- NULL
  newpeaks
}
trimm <- function(x, trim=c(0.05,0.95)) {
  a <- sort(x[x>0])
  Na <- length(a)
  quant <- round((Na*trim[1])+1):round(Na*trim[2])
  a[quant]
}
findEqualGreaterM <- function(x, values) {
  
  if (!is.double(x)) x <- as.double(x)
  if (!is.double(values)) values <- as.double(values)
  .C(C_FindEqualGreaterM,
     x,
     length(x),
     values,
     length(values),
     index = integer(length(values)))$index + 1
} 
rectUnique <- function(m, order = seq(length = nrow(m)), xdiff = 0, ydiff = 0) {
  
  nr <- nrow(m)
  nc <- ncol(m)
  if (!is.double(m))
    m <- as.double(m)
  .C(C_RectUnique,
     m,
     as.integer(order-1),
     nr,
     nc,
     as.double(xdiff),
     as.double(ydiff),
     logical(nrow(m)),
     DUP = FALSE)[[7]]
}
na.flatfill <- function(x) {
  
  realloc <- which(!is.na(x))
  if (realloc[1] > 1)
    x[1:(realloc[1]-1)] <- x[realloc[1]]
  if (realloc[length(realloc)] < length(x))
    x[(realloc[length(realloc)]+1):length(x)] <- x[realloc[length(realloc)]]
  x
}
SSgauss <- selfStart(~ h*exp(-(x-mu)^2/(2*sigma^2)), function(mCall, data, LHS) {
  
  xy <- sortedXyData(mCall[["x"]], LHS, data)
  
  len <- dim(xy)[1]
  xyarea <- sum((xy[2:len,2]+xy[1:(len-1),2])*(xy[2:len,1]-xy[1:(len-1),1]))/2
  maxpos <- which.max(xy[,2])
  
  mu <- xy[maxpos,1]
  h <- xy[maxpos,2]
  sigma <- xyarea/(h*sqrt(2*pi))
  
  value <- c(mu, sigma, h)
  names(value) <- mCall[c("mu", "sigma", "h")]
  value
  
}, c("mu", "sigma", "h"))
.narrow_rt_boundaries <- function(lm, d, thresh = 1) {
  lm_seq <- lm[1]:lm[2]
  above_thresh <- d[lm_seq] >= thresh
  if (any(above_thresh)) {
    ## Expand by one on each side to be consistent with old code.
    above_thresh <- above_thresh | c(above_thresh[-1], FALSE) |
      c(FALSE, above_thresh[-length(above_thresh)])
    lm <- range(lm_seq[above_thresh], na.rm = TRUE)
  }
  lm
}
.rawMat <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), rtrange = numeric(), scanrange = numeric(), log = FALSE) {
  if (length(rtrange) >= 2) {
    rtrange <- range(rtrange)
    ## Fix for issue #267. rtrange outside scanrange causes scanrange
    ## being c(Inf, -Inf)
    scns <- which((scantime >= rtrange[1]) & (scantime <= rtrange[2]))
    if (!length(scns))
      return(matrix(
        nrow = 0, ncol = 3,
        dimnames = list(character(), c("time", "mz", "intensity"))))
    scanrange <- range(scns)
  }
  if (length(scanrange) < 2)
    scanrange <- c(1, length(valsPerSpect))
  else scanrange <- range(scanrange)
  if (!all(is.finite(scanrange)))
    stop("'scanrange' does not contain finite values")
  if (!all(is.finite(mzrange)))
    stop("'mzrange' does not contain finite values")
  if (!all(is.finite(rtrange)))
    stop("'rtrange' does not contain finite values")
  if (scanrange[1] == 1)
    startidx <- 1
  else
    startidx <- sum(valsPerSpect[1:(scanrange[1] - 1)]) + 1
  endidx <- sum(valsPerSpect[1:scanrange[2]])
  scans <- rep(scanrange[1]:scanrange[2],
               valsPerSpect[scanrange[1]:scanrange[2]])
  masses <- mz[startidx:endidx]
  massidx <- 1:length(masses)
  if (length(mzrange) >= 2) {
    mzrange <- range(mzrange)
    massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))]
  }
  int <- int[startidx:endidx][massidx]
  if (log && (length(int) > 0))
    int <- log(int + max(1 - min(int), 0))
  cbind(time = scantime[scans[massidx]],
        mz = masses[massidx],
        intensity = int)
}
.getPeakGroupsRtMatrix <- function(peaks, peakIndex, sampleIndex, missingSample, extraPeaks) {
  ## For each feature:
  ## o extract the retention time of the peak with the highest intensity.
  ## o skip peak groups if they are not assigned a peak in at least a
  ##   minimum number of samples OR if have too many peaks from the same
  ##   sample assigned to it.
  nSamples <- length(sampleIndex)
  rt <- lapply(peakIndex, function(z) {
    cur_fts <- peaks[z, c("rt", "into", "sample"), drop = FALSE]
    ## Return NULL if we've got less samples that required or is the total
    ## number of peaks is larger than a certain threshold.
    ## Note that the original implementation is not completely correct!
    ## nsamp > nsamp + extraPeaks might be correct.
    nsamp <- length(unique(cur_fts[, "sample"]))
    if (nsamp < (nSamples - missingSample) |
        nrow(cur_fts) > (nsamp + extraPeaks))
      return(NULL)
    cur_fts[] <- cur_fts[order(cur_fts[, 2], decreasing = TRUE), ]
    cur_fts[match(sampleIndex, cur_fts[, 3]), 1]
  })
  rt <- do.call(rbind, rt)
  ## Order them by median retention time. NOTE: this is different from the
  ## original code, in which the peak groups are ordered by the median
  ## retention time that is calculated over ALL peaks within the peak
  ## group, not only to one peak selected for each sample (for multi
  ## peak per sample assignments).
  ## Fix for issue #175
  if (is(rt, "matrix")) {
    rt <- rt[order(Biobase::rowMedians(rt, na.rm = TRUE)), , drop = FALSE]
  }
  rt
}
.peakIndex <- function(object) {
  if (inherits(object, "DataFrame")) {
    idxs <- object$peakidx
    names(idxs) <- rownames(object)
  } else {
    if (is.null(object[["FeatureGroupTable"]]))
      stop("No feature definitions present. Please run groupChromPeaks first.")
    idxs <- object[["FeatureGroupTable"]]@listData[["peakidx"]]
    names(idxs) <- rownames(object[["FeatureGroupTable"]])
  }
  idxs
}
.applyRtAdjToChromPeaks <- function(x, rtraw, rtadj) {
  ## Using a for loop here.
  for (i in 1:length(rtraw)) {
    whichSample <- which(x[, "sample"] == i)
    if (length(whichSample)) {
      x[whichSample, c("rt", "rtmin", "rtmax")] <-
        .applyRtAdjustment(x[whichSample, c("rt", "rtmin", "rtmax")],
                           rtraw = rtraw[[i]], rtadj = rtadj[[i]])
    }
  }
  x
}
.applyRtAdjustment <- function(x, rtraw, rtadj) {
  ## re-order everything if rtraw is not sorted; issue #146
  if (is.unsorted(rtraw)) {
    idx <- order(rtraw)
    rtraw <- rtraw[idx]
    rtadj <- rtadj[idx]
  }
  adjFun <- stepfun(rtraw[-1] - diff(rtraw) / 2, rtadj)
  res <- adjFun(x)
  ## Fix margins.
  idx_low <- which(x < rtraw[1])
  if (length(idx_low)) {
    first_adj <- idx_low[length(idx_low)] + 1
    res[idx_low] <- x[idx_low] + res[first_adj] - x[first_adj]
  }
  idx_high <- which(x > rtraw[length(rtraw)])
  if (length(idx_high)) {
    last_adj <- idx_high[1] - 1
    res[idx_high] <- x[idx_high] + res[last_adj] - x[last_adj]
  }
  if (is.null(dim(res)))
    names(res) <- names(x)
  res
}
.getChromPeakData <- function(object, peakArea, sample_idx,mzCenterFun = "weighted.mean",cn = c("mz", "rt", "into", "maxo", "sample")) {
  
  ncols <- length(cn)
  res <- matrix(ncol = ncols, nrow = nrow(peakArea))
  colnames(res) <- cn
  res[, "sample"] <- sample_idx
  res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")]
  ## Load the data
  message("Requesting ", nrow(res), " peaks from ",
          basename(MSnbase::fileNames(object)), " ... ", appendLF = FALSE)
  
  spctr <- MSnbase::spectra(object, BPPARAM = SerialParam())
  
  
  
  mzs <- lapply(spctr, mz)
  valsPerSpect <- lengths(mzs)
  ints <- unlist(lapply(spctr, intensity), use.names = FALSE)
  rm(spctr)
  
  mzs <- unlist(mzs, use.names = FALSE)
  mzs_range <- range(mzs)
  rtim <- rtime(object)
  rtim_range <- range(rtim)
  
  for (i in seq_len(nrow(res))) {
    rtr <- peakArea[i, c("rtmin", "rtmax")]
    mzr <- peakArea[i, c("mzmin", "mzmax")]
    ## If the rt range is completely out; additional fix for #267
    if (rtr[2] < rtim_range[1] | rtr[1] > rtim_range[2] |
        mzr[2] < mzs_range[1] | mzr[1] > mzs_range[2]) {
      res[i, ] <- rep(NA_real_, ncols)
      next
    }
    ## Ensure that the mz and rt region is within the range of the data.
    rtr[1] <- max(rtr[1], rtim_range[1])
    rtr[2] <- min(rtr[2], rtim_range[2])
    mzr[1] <- max(mzr[1], mzs_range[1])
    mzr[2] <- min(mzr[2], mzs_range[2])
    mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim,
                                    valsPerSpect = valsPerSpect, rtrange = rtr,
                                    mzrange = mzr)
    if (length(mtx)) {
      if (any(!is.na(mtx[, 3]))) {
        ## How to calculate the area: (1)sum of all intensities / (2)by
        ## the number of data points (REAL ones, considering also NAs)
        ## and multiplied with the (3)rt width.
        ## (1) sum(mtx[, 3], na.rm = TRUE)
        ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used
        ## nrow(mtx) here, which would correspond to the non-NA
        ## intensities within the rt range we don't get the same results
        ## as e.g. centWave. Using max(1, ... to avoid getting Inf in
        ## case the signal is based on a single data point.
        ## (3) rtr[2] - rtr[1]
        res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) *
          ((rtr[2] - rtr[1]) /
             max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)))
        maxi <- which.max(mtx[, 3])
        res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)]
        res[i, c("rtmin", "rtmax")] <- rtr
        ## Calculate the intensity weighted mean mz
        meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
        if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
        res[i, "mz"] <- meanMz
      } else {
        res[i, ] <- rep(NA_real_, ncols)
      }
    } else {
      res[i, ] <- rep(NA_real_, ncols)
    }
  }
  message("got ", sum(!is.na(res[, "into"])), ".")
  return(res)
}
.feature_values <- function(pks, fts, method, value = "into",intensity = "into", colnames, missing = NA) {
  ftIdx <- fts$peakidx
  ## Match columns
  idx_rt <- match("rt", colnames(pks))
  idx_int <- match(intensity, colnames(pks))
  idx_samp <- match("sample", colnames(pks))
  vals <- matrix(nrow = length(ftIdx), ncol = length(colnames))
  nSamples <- seq_along(colnames)
  if (method == "sum") {
    for (i in seq_along(ftIdx)) {
      cur_pks <- pks[ftIdx[[i]], c(value, "sample")]
      int_sum <- split(cur_pks[, value], cur_pks[, "sample"])
      vals[i, as.numeric(names(int_sum))] <-
        unlist(lapply(int_sum, base::sum), use.names = FALSE)
    }
  } else {
    if (method == "medret") {
      medret <- fts$rtmed
      for (i in seq_along(ftIdx)) {
        gidx <- ftIdx[[i]][
          base::order(base::abs(pks[ftIdx[[i]],
                                    idx_rt] - medret[i]))]
        vals[i, ] <- gidx[
          base::match(nSamples, pks[gidx, idx_samp])]
      }
    }
    if (method == "maxint") {
      for (i in seq_along(ftIdx)) {
        gidx <- ftIdx[[i]][
          base::order(pks[ftIdx[[i]], idx_int],
                      decreasing = TRUE)]
        vals[i, ] <- gidx[base::match(nSamples,
                                      pks[gidx, idx_samp])]
      }
    }
    if (value != "index") {
      if (!any(colnames(pks) == value))
        stop("Column '", value, "' not present in the ",
             "chromatographic peaks matrix!")
      vals <- pks[vals, value]
      dim(vals) <- c(length(ftIdx), length(nSamples))
    }
  }
  if (value != "index") {
    if (is.numeric(missing)) {
      vals[is.na(vals)] <- missing
    }
    if (!is.na(missing) & missing == "rowmin_half") {
      for (i in seq_len(nrow(vals))) {
        nas <- is.na(vals[i, ])
        if (any(nas))
          vals[i, nas] <- min(vals[i, ], na.rm = TRUE) / 2
      }
    }
  }
  colnames(vals) <- colnames
  rownames(vals) <- rownames(fts)
  vals
}
.groupval <-function(xset, method = c("medret", "maxint"),value = "into", intensity = "into"){
  
  if ( nrow(xset@groups)<1 || length(xset@groupidx) <1) {
    stop("xcmsSet is not been grouped.")
  }
  
  method <- match.arg(method)
  
  peakmat <- xset@peaks
  groupmat <- xset@groups
  groupindex <- xset@groupidx
  
  sampnum <- seq(length = length(rownames(xset@phenoData)))
  retcol <- match("rt", colnames(peakmat))
  intcol <- match(intensity, colnames(peakmat))
  sampcol <- match("sample", colnames(peakmat))
  
  values <- matrix(nrow = length(groupindex), ncol = length(sampnum))
  
  if (method == "medret") {
    for (i in seq(along = groupindex)) {
      gidx <- groupindex[[i]][order(abs(peakmat[groupindex[[i]],retcol] - median(peakmat[groupindex[[i]],retcol])))]
      values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
    }
  } else {
    for (i in seq(along = groupindex)) {
      gidx <- groupindex[[i]][order(peakmat[groupindex[[i]],intcol], decreasing = TRUE)]
      values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
    }
  }
  
  if (value != "index") {
    values <- peakmat[values,value]
    dim(values) <- c(length(groupindex), length(sampnum))
  }
  colnames(values) <- rownames(xset@phenoData)
  rownames(values) <- paste(round(groupmat[,"mzmed"],1), round(groupmat[,"rtmed"]), sep = "/")
  
  values
}


### ---- ===== MatchedFilter Sub Kit ==== ---- #####
binYonX <- function(x, y, breaks, nBins, binSize, binFromX,
                    binToX, fromIdx = 1L, toIdx = length(x),
                    method = "max", baseValue,
                    sortedX = !is.unsorted(x),
                    shiftByHalfBinSize = FALSE,
                    returnIndex = FALSE, returnX = TRUE) {
  if (!missing(x) & missing(y))
    y <- x
  if (missing(x) | missing(y))
    stop("Arguments 'x' and 'y' are mandatory!")
  if (missing(nBins) & missing(binSize) & missing(breaks))
    stop("One of 'breaks', 'nBins' or 'binSize' has to be defined!")
  if (!sortedX) {
    message("'x' is not sorted, will sort 'x' and 'y'.")
    ## Sort method; see issue #180 for MSnbase
    ## Note: order method = "radix" is considerably faster - but there is no
    ## method argument for older R versions.
    o <- order(x)
    x <- x[o]
    y <- y[o]
  }
  ## Check fromIdx and toIdx
  if (any(fromIdx < 1) | any(toIdx > length(x)))
    stop("'fromIdx' and 'toIdx' have to be within 1 and lenght(x)!")
  if (length(toIdx) != length(fromIdx))
    stop("'fromIdx' and 'toIdx' have to have the same length!")
  if (missing(binFromX))
    binFromX <- as.double(NA)
  if (missing(binToX))
    binToX <- as.double(NA)
  ## For now we don't allow NAs in x
  if (anyNA(x))
    stop("No 'NA' values are allowed in 'x'!")
  ## Check that input values have the correct data types.
  if (!is.double(x)) x <- as.double(x)
  if (!is.double(y)) y <- as.double(y)
  if (!is.double(binFromX)) binFromX <- as.double(binFromX)
  if (!is.double(binToX)) binToX <- as.double(binToX)
  if (!is.integer(fromIdx)) fromIdx <- as.integer(fromIdx)
  if (!is.integer(toIdx)) toIdx <- as.integer(toIdx)
  ## breaks has precedence over nBins over binSize.
  shiftIt <- 0L
  if (!missing(breaks)) {
    if (shiftByHalfBinSize)
      warning("Argument 'shiftByHalfBinSize' is ignored if 'breaks'",
              " are provided.")
    if (!is.double(breaks)) breaks <- as.double(nBins)
    nBins <- NA_integer_
    binSize <- as.double(NA)
  } else {
    if (!missing(nBins)) {
      breaks <- as.double(NA)
      if (!is.integer(nBins)) nBins <- as.integer(nBins)
      binSize <- as.double(NA)
    } else{
      breaks <- as.double(NA)
      nBins <- NA_integer_
      if (!is.double(binSize)) binSize <- as.double(binSize)
    }
  }
  if (shiftByHalfBinSize)
    shiftIt <- 1L
  ## Define default value for baseValue
  if (missing(baseValue)) {
    baseValue = as.double(NA)
  } else {
    if (!is.double(baseValue)) baseValue <- as.double(baseValue)
  }
  
  getIndex <- 0L
  if (returnIndex)
    getIndex <- 1L
  getX <- 0L
  if (returnX)
    getX <- 1L
  if (length(toIdx) > 1) {
    .Call(C_binYonX_multi, x, y, breaks, nBins, binSize,
          binFromX, binToX, force(fromIdx - 1L), force(toIdx - 1L),
          shiftIt,
          as.integer(.aggregateMethod2int(method)),
          baseValue,
          getIndex,
          getX,
          PACKAGE = "MetaboAnalystR")
  } else {
    .Call(C_binYonX, x, y, breaks, nBins, binSize,
          binFromX, binToX, force(fromIdx - 1L), force(toIdx - 1L),
          shiftIt,
          as.integer(.aggregateMethod2int(method)),
          baseValue,
          getIndex,
          getX,
          PACKAGE = "MetaboAnalystR")
  }
}

.aggregateMethod2int <- function(method = "max") {
  .aggregateMethods <- c(1, 2, 3, 4);
  names(.aggregateMethods) <- c("max", "min", "sum", "mean")
  method <- match.arg(method, names(.aggregateMethods))
  return(.aggregateMethods[method])
}
imputeLinInterpol <- function(x, baseValue, method = "lin", distance = 1L,
                              noInterpolAtEnds = FALSE) {
  method <- match.arg(method, c("none", "lin", "linbase")) ## interDist? distance = 2
  if (method == "none") {
    return(x)
  }
  if (!is.double(x)) x <- as.double(x)
  if (method == "lin") {
    noInter <- 0L
    if (noInterpolAtEnds)
      noInter <- 1L
    return(.Call(C_impute_with_linear_interpolation, x, noInter,
                 PACKAGE = "MetaboAnalystR"))
  }
  if (method == "linbase") {
    if (missing(baseValue))
      baseValue <- min(x, na.rm = TRUE) / 2
    if (!is.double(baseValue)) baseValue <- as.double(baseValue)
    if (!is.integer(distance)) distance <- as.integer(distance)
    return(.Call(C_impute_with_linear_interpolation_base, x, baseValue,
                 distance, PACKAGE = "MetaboAnalystR"))
  }
}
colMax <- function (x, na.rm = FALSE, dims = 1) {
  
  if (is.data.frame(x))
    x <- as.matrix(x)
  if (!is.array(x) || length(dn <- dim(x)) < 2)
    stop("`x' must be an array of at least two dimensions")
  if (dims < 1 || dims > length(dn) - 1)
    stop("invalid `dims'")
  n <- prod(dn[1:dims])
  dn <- dn[-(1:dims)]
  if (!is.double(x)) x <- as.double(x)
  z <- .C(C_ColMax,
          x,
          as.integer(n),
          as.integer(prod(dn)),
          double(prod(dn)),
          PACKAGE = "MetaboAnalystR")[[4]]
  if (length(dn) > 1) {
    dim(z) <- dn
    dimnames(z) <- dimnames(x)[-(1:dims)]
  }
  else names(z) <- dimnames(x)[[dims + 1]]
  z
}
filtfft <- function(y, filt) {
  
  yfilt <- numeric(length(filt))
  yfilt[1:length(y)] <- y
  yfilt <- fft(fft(yfilt, inverse = TRUE) * filt)
  
  Re(yfilt[1:length(y)])
}
descendZero <- function(y, istart = which.max(y)) {
  
  if (!is.double(y)) y <- as.double(y)
  unlist(.C(C_DescendZero,
            y,
            length(y),
            as.integer(istart-1),
            ilower = integer(1),
            iupper = integer(1),
            PACKAGE = "MetaboAnalystR")[4:5]) + 1
}
which.colMax <- function (x, na.rm = FALSE, dims = 1) {
  
  if (is.data.frame(x))
    x <- as.matrix(x)
  if (!is.array(x) || length(dn <- dim(x)) < 2)
    stop("`x' must be an array of at least two dimensions")
  if (dims < 1 || dims > length(dn) - 1)
    stop("invalid `dims'")
  n <- prod(dn[1:dims])
  dn <- dn[-(1:dims)]
  if (!is.double(x)) x <- as.double(x)
  z <- .C(C_WhichColMax,
          x,
          as.integer(n),
          as.integer(prod(dn)),
          integer(prod(dn)),
          PACKAGE = "MetaboAnalystR")[[4]]
  if (length(dn) > 1) {
    dim(z) <- dn
    dimnames(z) <- dimnames(x)[-(1:dims)]
  }
  else names(z) <- dimnames(x)[[dims + 1]]
  z
}
breaks_on_nBins <- function(fromX, toX, nBins, shiftByHalfBinSize = FALSE) {
  if(missing(fromX) | missing(toX) | missing(nBins))
    stop("'fromX', 'toX' and 'nBins' are required!")
  if (!is.double(fromX)) fromX <- as.double(fromX)
  if (!is.double(toX)) toX <- as.double(toX)
  if (!is.integer(nBins)) nBins <- as.integer(nBins)
  shiftIt <- 0L
  if (shiftByHalfBinSize)
    shiftIt <- 1L
  return(.Call(C_breaks_on_nBins, fromX, toX, nBins, shiftIt,
               PACKAGE = "MetaboAnalystR"))
}

### ---- ==== Botthom of this sub Kit ==== --- ####



# 
#########         --------------======== Bottom of this Function Kit =============--------          






# ---------------------------2. Peaks Picking_Section-------------------------------

### Functions_Peak Peaking _ used for parameters optimization
#' @title Data Preparation for ChromPeaking Finding
#' @param object MSnExp object.
#' @import MSnbase
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)
PeakPicking_prep <-function(object){
  
  ## Check if the data is centroided
  centroided <- all(centroided(object))
  if (is.na(centroided)) {
    suppressWarnings(
      centroided <- isCentroided(
        object[[ceiling(length(object) / 3)]])
    )
  }
  if (is.na(centroided) || !centroided)
    warning("Your data appears to be not centroided! CentWave",
            " works best on data in centroid mode.")
  
  
  ##### Revise for Centwave MSnExp -------------
  
  ## (1) split the object per file from MSnExp.
  object_mslevel_i<-splitByFile(object = object, f = factor(c(1:length(object@phenoData@data[["sample_name"]]))))
  object_mslevel_o<-bplapply(object_mslevel_i, FUN = function(x){x@assayData})
  message("Data Spliting Finished !")
  
  ## (2) use parallel to do the peak detection.
  message("Peak Preparing Begin...")
  object_mslevel_name<-bplapply(object_mslevel_o,ls);
  object_mslevell<-object_mslevel<-list();
  for (j in 1:length(object_mslevel_o)){
    for (i in 1:length(object_mslevel_name[[j]])){
      #print(i,j,"\n")
      object_mslevell[[i]]<-object_mslevel_o[[j]][[object_mslevel_name[[j]][i]]]
    }
    names(object_mslevell)<-object_mslevel_name[[j]];
    object_mslevel[[j]]<-object_mslevell;
  }
  
  for (i in 1:length(object_mslevel)){
    #for (j in 1:length(object_mslevel[[i]])){
    ncount<-as.numeric(which(sapply(names(object_mslevel[[i]]),is.na)));
    if (!identical(ncount,numeric(0))){
      object_mslevel[[i]]<-object_mslevel[[i]][-ncount]
    }
    #}
  }
  message("Peak Preparing Done !")
  return(object_mslevel)
  
  
}

#' @title Calculate PPS method
#' @description Peak picking method. Specifically used for parameters optimization
#' @param xset MSnExp object.
#' @param object_mslevel List, prepared by findChromPeaks_prep function.
#' @param param Parameters list.
#' @param BPPARAM Parallel Method.
#' @param msLevel msLevel. Only 1 is supported currently.
#' @import MSnbase
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)
PeakPicking_core <-function(object,object_mslevel,param, BPPARAM = bpparam(), msLevel = 1L){
  
 
  
  ## Restrict to MS 1 data for now.
  if (length(msLevel) > 1)
    stop("Currently only peak detection in a single MS level is ",
         "supported", call. = FALSE)
  
  if (is.null(param$fwhm)){
    
    resList <- bplapply(object_mslevel,
                      FUN = PeakPicking_centWave_slave,
                      param = param,
                      BPPARAM = BPPARAM)
  
  } else {
    
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_MatchedFilter_slave,
                        param = param,
                        BPPARAM = BPPARAM)
    
  }
  
  print("Peak Profiling Finished !")
  
  rm(object_mslevel)
  ## (3) PEAK SUMMARY------------
  
  pks <- vector("list", length(resList))
  for (i in 1:length(resList)) {
    n_pks <- nrow(resList[[i]])
    if (is.null(n_pks))
      n_pks <- 0
    if (n_pks == 0) {
      pks[[i]] <- NULL
      warning("No peaks found in sample number ", i, ".")
    } else {
      pks[[i]] <- cbind(resList[[i]], sample = rep.int(i, n_pks))
    }
    
  } 
  pks <- do.call(rbind, pks)
  
  # Make the chrompeaks embeded
  newFd <- list()
  #newFd@.xData <- as.environment(as.list(object@msFeatureData, all.names = TRUE))
  rownames(pks) <- sprintf(paste0("CP", "%0", ceiling(log10(nrow(pks) + 1L)), "d"),
                           seq(from = 1, length.out = nrow(pks)))
  
  newFd$chromPeaks <- pks
  newFd$chromPeakData <- S4Vectors::DataFrame(ms_level = rep(1L, nrow(pks)), is_filled = rep(FALSE, nrow(pks)),
                                              row.names = rownames(pks))
  
  ## mSet Generation
  mSet<-list()
  mSet$msFeatureData <- newFd
  mSet$inMemoryData <- object
  
  return(mSet)

}



# -------------------------------3. MS2 Data_Section---------------------------------


#' Extract MS2 Data
#' @description This function returns a list of spectra that matches with 
#' a user specified precursor m/z. 
#' @param filename Name of the file (e.g. mzML, mzXML)
#' @param peakParams Object containing parameters for peak picking.
#' @param mzmin Minimum m/z when selecting a precursor from peak list 
#' @param mzmax Maximum m/z when selecting a precursor from peak list 
#' @author Jasmine Chong \email{jasmine.chong@mail.mcgill.ca},
#' Mai Yamamoto \email{yamamoto.mai@mail.mcgill.ca}, and Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)


ExtractMS2data <- function(filename, peakParams, mzmin, mzmax){
  
  ## Function to extract MSMS spectra 
  xrms <- readMSData(filename, mode = "onDisk", centroided = TRUE)
  # get all MS2 spectra from DDA experiment
  ms2spectra <- spectra(filterMsLevel(xrms, msLevel = 2))  
  
  ## Detect chromatographic peaks 
  # set parameters and find chromatographic peaks 
  ms1cwp <- CentWaveParam(snthresh = peakParams$sn_thresh, 
                          noise = 100, ppm = peakParams$ppm, peakwidth = c(peakParams$min_pkw, peakParams$max_pkw))
  ms1data <- findChromPeaks(xrms, param = ms1cwp, msLevel = 1)
  
  #get all peaks 
  chromPeaks <- chromPeaks(ms1data)
  
  # pick one compound 
  ## find which row contains the mass we want
  chp <- as.data.frame(chromPeaks)
  rownum <- which(chp$mz >= mzmin & chp$mz <= mzmax)
  chromPeak <- chromPeaks[rownum,]
  
  #filter out fitting spectra
  filteredMs2spectra <- .getDdaMS2Scans(chromPeak, ms2spectra)
  
  return(filteredMs2spectra)
}

### Helper function
### function for DDA data 
### Credit: Michael Witting; https://github.com/michaelwitting/metabolomics2018/blob/master/R/msLevelMerge.R
### MS2 spectra need to be list of spectrum2 objects
.getDdaMS2Scans <- function(chromPeak, ms2spectra, mzTol = 0.01, rtTol = 20) {
  
  #make a new list
  filteredMs2spectra <- list()
  
  #isolate only the ones within range
  for(i in 1:length(ms2spectra)) {
    
    #isolate Ms2 spectrum
    ms2spectrum <- ms2spectra[[i]]
    
    #check if within range of peak
    if(abs(chromPeak[4] - ms2spectrum@rt) < rtTol & abs(chromPeak[1] - ms2spectrum@precursorMz) < mzTol) {
      filteredMs2spectra <- c(filteredMs2spectra, ms2spectrum)
    }
  }
  
  #return list with filtered ms2 spectra
  return(filteredMs2spectra)
}

#' Plot selected M2 spectra for an m/z feature
#' @description This function creates a plot of the user selected precursor m/z.
#' @param ms2 Spectrum2 class object containing all of the spectra for the selected
#' m/z feature.
#' @author Jasmine Chong \email{jasmine.chong@mail.mcgill.ca},
#' Mai Yamamoto \email{yamamoto.mai@mail.mcgill.ca}, and Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)


PlotMS2Spectra <- function(ms2, spectra = 1){
  
  MSnbase::plot(ms2[[spectra]])
  
}  



# -----------------------------------4. Annotation-----------------------

##' @references Kuhl C, Tautenhahn R, Boettcher C, Larson TR, Neumann S (2012). 
##' "CAMERA: an integrated strategy for compound spectra extraction and annotation of 
##' liquid chromatography/mass spectrometry data sets." Analytical Chemistry, 84, 283-289. 
##' http://pubs.acs.org/doi/abs/10.1021/ac202450g.

####### ---------- ====== Function Kit From CAMERA ======= ----------- ######/
getPeaks_selection <- function(xs, method="medret", value="into"){
  
  if (!class(xs) == "xcmsSet") {
    stop ("Parameter xs is no xcmsSet object\n")
  }
  
  # Testing if xcmsSet is grouped
  if (nrow(xs@groups) > 0 && length(xs@filepaths) > 1) {
    # get grouping information
    groupmat <- xs@groups
    # generate data.frame for peaktable
    ts <- data.frame(cbind(groupmat, groupval(xs, method=method, value=value)), row.names = NULL)
    #rename column names
    cnames <- colnames(ts)
    if (cnames[1] == 'mzmed') {
      cnames[1] <- 'mz' 
    } else { 
      stop ('Peak information ?!?')
    }
    if (cnames[4] == 'rtmed') {
      cnames[4] <- 'rt' 
    } else {
      stop ('Peak information ?!?')
    }
    colnames(ts) <- cnames
  } else if (length(rownames(xs@phenoData)) == 1) { #Contains only one sample? 
    ts <- xs@peaks
  } else {
    stop ('First argument must be a xcmsSet with group information or contain only one sample.')
  }
  
  return(as.matrix(ts))
}
groupval <- function(object, method = c("medret", "maxint"), value = "index", intensity = "into") {
  
  if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
    stop("mSet$xcmsSet is not been grouped.")
  }
  
  method <- match.arg(method)
  
  peakmat <- object@peaks;  groupmat <- object@groups;  groupindex <- object@groupidx
  
  sampnum <- seq(length = length(rownames(object@phenoData)))
  retcol <- match("rt", colnames(peakmat))
  intcol <- match(intensity, colnames(peakmat))
  sampcol <- match("sample", colnames(peakmat))
  
  values <- matrix(nrow = length(groupindex), ncol = length(sampnum))
  
  if (method == "medret") {
    for (i in seq(along = groupindex)) {
      gidx <- groupindex[[i]][order(abs(peakmat[groupindex[[i]],retcol] - median(peakmat[groupindex[[i]],retcol])))]
      values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
    }
  } else {
    for (i in seq(along = groupindex)) {
      gidx <- groupindex[[i]][order(peakmat[groupindex[[i]],intcol], decreasing = TRUE)]
      values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
    }
  }
  
  if (value != "index") {
    values <- peakmat[values,value]
    dim(values) <- c(length(groupindex), length(sampnum))
  }
  colnames(values) <- rownames(object@phenoData)
  rownames(values) <- paste(round(groupmat[,"mzmed"],1), round(groupmat[,"rtmed"]), sep = "/")
  
  values
}
percentOutput <- function(len.old, len.add, len.max, cnt){
  len <- len.old + len.add;
  perc <- round((len) / len.max * 100)
  perc <- perc %/% 10 * 10;
  if (perc != cnt && perc != 0) { 
    print(perc,' '); 
    cnt2 <- perc;
    eval.parent(substitute(cnt <- cnt2))
  }
  if (.Platform$OS.type == "windows"){ 
    flush.console();
  }
  eval.parent(substitute(len.old <- len))
}
is.wholenumber <- function(x, tol = .Machine$double.eps^0.5)  abs(x - round(x)) < tol
calcIsotopeMatrix <- function(maxiso=4){
  
  if(!is.numeric(maxiso)){
    stop("Parameter maxiso is not numeric!\n")  
  } else if(maxiso < 1 | maxiso > 8){
    stop(paste("Parameter maxiso must between 1 and 8. ",
               "Otherwise use your own IsotopeMatrix.\n"),sep="")
  }
  
  isotopeMatrix <- matrix(NA, 8, 4);
  colnames(isotopeMatrix) <- c("mzmin", "mzmax", "intmin", "intmax")
  
  isotopeMatrix[1, ] <- c(1.000, 1.0040, 1.0, 150)
  isotopeMatrix[2, ] <- c(0.997, 1.0040, 0.01, 200)
  isotopeMatrix[3, ] <- c(1.000, 1.0040, 0.001, 200)
  isotopeMatrix[4, ] <- c(1.000, 1.0040, 0.0001, 200)
  isotopeMatrix[5, ] <- c(1.000, 1.0040, 0.00001, 200)
  isotopeMatrix[6, ] <- c(1.000, 1.0040, 0.000001, 200)
  isotopeMatrix[7, ] <- c(1.000, 1.0040, 0.0000001, 200)
  isotopeMatrix[8, ] <- c(1.000, 1.0040, 0.00000001, 200)  
  
  return(isotopeMatrix[1:maxiso, , drop=FALSE])
  
}
findIsotopesPspec <- function(isomatrix, mz, ipeak, int, params){
  #isomatrix - isotope annotations (5 column matrix)
  #mz - m/z vector, contains all m/z values from specific pseudospectrum
  #int - int vector, see above
  #maxiso - how many isotopic peaks are allowed
  #maxcharge - maximum allowed charge
  #devppm - scaled ppm error
  #mzabs - absolut error in m/z
  
  #matrix with all important informationen
  spectra <- matrix(c(mz, ipeak), ncol=2)
  int     <- int[order(spectra[, 1]), , drop=FALSE]
  spectra <- spectra[order(spectra[, 1]), ];    
  cnt     <- nrow(spectra);
  #isomatrix <- matrix(NA, ncol=5, nrow=0)
  #colnames(isomatrix) <- c("mpeak", "isopeak", "iso", "charge", "intrinsic")
  
  #calculate error
  error.ppm <- params$devppm * mz;
  #error.abs <- ),1, function(x) x + params$mzabs*rbind(1,2,3)));
  
  #for every peak in pseudospectrum
  for ( j in 1:(length(mz) - 1)){
    #create distance matrix
    MI <- spectra[j:cnt, 1] - spectra[j, 1];
    #Sum up all possible/allowed isotope distances + error(ppm of peak mz and mzabs)
    max.index <- max(which(MI < (sum(params$IM[1:params$maxiso, "mzmax"]) + error.ppm[j] + params$mzabs )))
    #check if one peaks falls into isotope window
    if(max.index == 1){
      #no promising candidate found, move on
      next;
    }
    
    #IM - isotope matrix (column diffs(min,max) per charge, row num. isotope)
    IM <- t(sapply(1:params$maxcharge,function(x){
      mzmin <- (params$IM[, "mzmin"]) / x;
      mzmax <- (params$IM[, "mzmax"]) / x;
      error <-      (error.ppm[j]+params$mzabs) / x
      res   <- c(0,0);
      for(k in 1:length(mzmin)){
        res <- c(res, mzmin[k]+res[2*k-1], mzmax[k]+res[2*k])
      }
      res[seq(1,length(res),by=2)] <- res[seq(1,length(res),by=2)]-error
      res[seq(2,length(res),by=2)] <- res[seq(2,length(res),by=2)]+error
      return (res[-c(1:2)])
    } ))
    
    #Sort IM to fix bug, with high ppm and mzabs values 
    #TODO: Find better solution and give feedback to user!
    IM <- t(apply(IM,1,sort))
    
    #find peaks, which m/z value is in isotope interval
    hits <- t(apply(IM, 1, function(x){ findInterval(MI[1:max.index], x)}))
    rownames(hits) <- c(1:nrow(hits))
    colnames(hits) <- c(1:ncol(hits))
    hits[which(hits==0)] <-NA
    hits <- hits[, -1, drop=FALSE]
    hits.iso <- hits%/%2 + 1;
    
    
    #check occurence of first isotopic peak
    for(iso in 1:min(params$maxiso,ncol(hits.iso))){
      hit <- apply(hits.iso,1, function(x) any(naOmit(x)==iso))
      hit[which(is.na(hit))] <- TRUE
      if(all(hit)) break;
      hits.iso[!hit,] <- t(apply(hits.iso[!hit,,drop=FALSE],1, function(x) {
        if(!all(is.na(x))){
          ini <- which(x > iso)
          if(!is.infinite(ini) && length(ini) > 0){
            x[min(ini):ncol(hits.iso)] <- NA  
          }
        }
        x
      }))
    }
    
    #set NA to 0
    hits[which(is.na(hits.iso))] <- 0
    #check if any isotope is found
    hit <- apply(hits, 1, function(x) sum(x)>0)
    #drop nonhits  
    hits <- hits[hit, , drop=FALSE]
    
    #if no first isotopic peaks exists, next
    if(nrow(hits) == 0){
      next;
    }
    
    #getting max. isotope cluster length
    #TODO: unique or not????
    #isolength <- apply(hits, 1, function(x) length(which(unique(x) %% 2 !=0)))
    #isohits - for each charge, length of peak within intervals
    isohits   <- lapply(1:nrow(hits), function(x) which(hits[x, ] %% 2 !=0))
    isolength <- sapply(isohits, length)
    
    #Check if any result is found
    if(all(isolength==0)){
      next;
    }
    
    #itensity checks
    #candidate.matrix
    #first column - how often succeded the isotope intensity test
    #second column - how often could a isotope int test be performed
    candidate.matrix <- matrix(0, nrow=length(isohits), ncol=max(isolength)*2);
    
    for(iso in 1:length(isohits)){
      for(candidate in 1:length(isohits[[iso]])){
        for(sample.index in c(1:ncol(int))){
          #Test if C12 Peak is NA
          if(!is.na(int[j, sample.index])){              
            #candidate.matrix[maxIso, 1] <- candidate.matrix[maxIso, 1] + 1
          }
          charge <- as.numeric(row.names(hits)[iso])
          int.c12 <- int[j, sample.index]
          isotopePeak <- hits[iso,isohits[[iso]][candidate]]%/%2 + 1;
          if(isotopePeak == 1){
            #first isotopic peak, check C13 rule
            int.c13 <- int[isohits[[iso]][candidate]+j, sample.index];
            int.available <- all(!is.na(c(int.c12, int.c13)))
            if (int.available){
              theo.mass <- spectra[j, 1] * charge; #theoretical mass
              numC      <- abs(round(theo.mass / 12)); #max. number of C in molecule
              inten.max <- int.c12 * numC * 0.011; #highest possible intensity
              inten.min <- int.c12 * 1    * 0.011; #lowest possible intensity
              if((int.c13 < inten.max && int.c13 > inten.min) || !params$filter){
                candidate.matrix[iso,candidate * 2 - 1] <- candidate.matrix[iso,candidate * 2 - 1] + 1
                candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
              }else{
                candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
              }
            } else {
              #todo
            } 
          } else {
            #x isotopic peak
            int.cx <- int[isohits[[iso]][candidate]+j, sample.index];
            int.available <- all(!is.na(c(int.c12, int.cx)))
            if (int.available) {
              intrange <- c((int.c12 * params$IM[isotopePeak,"intmin"]/100),
                            (int.c12 * params$IM[isotopePeak,"intmax"]/100))
              #filter Cx isotopic peaks muss be smaller than c12
              if(int.cx < intrange[2] && int.cx > intrange[1]){
                candidate.matrix[iso,candidate * 2 - 1] <- candidate.matrix[iso,candidate * 2 - 1] + 1
                candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1                        
              }else{
                candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
              }
            } else {
              candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
            }#end int.available
          }#end if first isotopic peak
        }#for loop samples
      }#for loop candidate
    }#for loop isohits
    
    #calculate ratios
    candidate.ratio <- candidate.matrix[, seq(from=1, to=ncol(candidate.matrix),
                                              by=2)] / candidate.matrix[, seq(from=2, 
                                                                              to=ncol(candidate.matrix), by=2)];
    if(is.null(dim(candidate.ratio))){
      candidate.ratio <- matrix(candidate.ratio, nrow=nrow(candidate.matrix))
    }
    if(any(is.nan(candidate.ratio))){
      candidate.ratio[which(is.nan(candidate.ratio))] <- 0;
    }
    
    #decision between multiple charges or peaks
    for(charge in 1:nrow(candidate.matrix)){
      if(any(duplicated(hits[charge, isohits[[charge]]]))){
        #One isotope peaks has more than one candidate
        ##check if problem is still consistent
        for(iso in unique(hits[charge, isohits[[charge]]])){
          if(length(index <- which(hits[charge, isohits[[charge]]]==iso))== 1){
            #now duplicates next
            next;
          }else{
            #find best
            index2 <- which.max(candidate.ratio[charge, index]);
            save.ratio <- candidate.ratio[charge, index[index2]]
            candidate.ratio[charge,index] <- 0
            candidate.ratio[charge,index[index2]] <- save.ratio
            index <- index[-index2]
            isohits[[charge]] <- isohits[[charge]][-index]
          }
        }
      }#end if
      
      for(isotope in 1:ncol(candidate.ratio)){
        if(candidate.ratio[charge, isotope] >= params$minfrac){
          isomatrix <- rbind(isomatrix, 
                             c(spectra[j, 2],
                               spectra[isohits[[charge]][isotope]+j, 2], 
                               isotope, as.numeric(row.names(hits)[charge]), 0))
        } else{
          break;
        }
      }
    }#end for charge
  }#end for j
  
  return(isomatrix)
}
getEICs <- function(xraw,peaks,maxscans=length(xraw@scantime)) {
  npeaks <- dim(peaks)[1]; scans <- length(xraw@scantime)
  eics <- matrix(as.numeric(0),npeaks,maxscans)
  for (p in 1:npeaks) {
    eics[p,1:scans] <- as.integer(getEIC(xraw,massrange=c(peaks[p,"mzmin"],peaks[p,"mzmax"]))$intensity)
  }
  eics
}
create.matrix <- function(dim1,dim2) {
  x <- matrix()
  length(x) <- dim1*dim2
  dim(x) <- c(dim1,dim2)
  x
}
getAllPeakEICs <- function(mSet, index=NULL){
  
  #Checking parameter index
  if(is.null(index)){
    stop("Parameter index is not set.\n")
  }else if(length(index) != nrow(mSet$AnnotateObject$groupInfo)){
    stop("Index length must equals number of peaks.\n")
  }
  
  nfiles <- length(mSet$xcmsSet@filepaths)
  scantimes <- list()
  
  if(nfiles == 1){
    #Single sample experiment
    if (file.exists(mSet$xcmsSet@filepaths[1])) { 
      
      mSet_splits <- mSet$onDiskData
      scantimes[[1]] <- scan_length <- sapply(mSet_splits, length)
      maxscans <- max(scan_length)
      
      #xraw <- xcmsRaw(filepaths(mSet$xcmsSet)[1],profstep=0)
      #maxscans <- length(xraw@scantime)
      #scantimes[[1]] <- xraw@scantime
      mset$onDiskData <- mSet_splits
      
      pdata <- as.data.frame(mSet$xcmsSet@peaks) 
      
      EIC <- create.matrix(nrow(pdata),maxscans)
      EIC[,] <- getEIC4Peaks(mset,pdata,maxscans)#getEIC4Peaks(xraw,pdata,maxscans)
      
    } else {
      stop('Raw data file:',mSet$xcmsSet@filepaths[1],' not found ! \n');
    }
  } else {
    #Multiple sample experiment
    gval <- groupval(mSet$xcmsSet);
    
    message('Generating EIC\'s . ',appendLF = F)
    
    #na flag, stores if sample contains NA peaks
    na.flag <- 0;   maxscans <- 0;
   
    mSet_splits <- split(mSet$onDiskData,fromFile(mSet$onDiskData))
    message('.',appendLF = F)
    
    if (file.exists(mSet$xcmsSet@filepaths[1])) { 
      
      scan_length <- sapply(mSet_splits, length)
      
      
      maxscans <- max(scan_length)
      message('.',appendLF = F)
      
    } else {
      
      stop('Raw data file:',mSet$xcmsSet@filepaths[1],' not found ! \n');
    
    }
    
    #generate EIC Matrix
    EIC <- create.matrix(nrow(gval),maxscans)
    
    
    mset <-list();mset$env <- new.env(parent=.GlobalEnv)
    message('.')
    
    
    #loop over all samples
    for (f in 1:nfiles){
      
      message(paste("Detecting",basename(mSet$xcmsSet@filepaths)[f]," ... "),appendLF = F)
      
      #which peaks should read from this sample    
      idx.peaks <- which(index == f);
      
      #check if we need data from sample f
      if(length(idx.peaks) == 0){
        next;
      }
      
      #check if raw data file of sample f exists
      if (file.exists(mSet$xcmsSet@filepaths[f])) {
        #read sample
        
        scantimes[[f]] <- scan_length[f]
        
        pdata <- as.data.frame(mSet$xcmsSet@peaks[gval[idx.peaks,f],,drop=FALSE]) # data for peaks from file f
        
        #Check if peak data include NA values
        if(length(which(is.na(pdata[,1]))) > 0){
          na.flag <- 1;
        }
        mset$onDiskData <- mSet_splits[[f]]
        
        #Generate raw data according to peak data
        EIC[idx.peaks,] <- getEIC4Peaks(mset,pdata,maxscans)
        
      } else {
        stop('Raw data file:',mSet$xcmsSet@filepaths[f],' not found ! \n')
      }
    
    }
    
    if(na.flag ==1){
      print("Warning: Found NA peaks in selected sample.");
    }
  }
  invisible(list(scantimes=scantimes,EIC=EIC)); 
}
getEIC4Peaks <- function(mset,peaks,maxscans){
  
  mset$env$mz <- lapply(MSnbase::spectra(mset$onDiskData, BPPARAM = SerialParam()), mz)
  mset$env$intensity <- MSnbase::intensity(mset$onDiskData, BPPARAM = SerialParam())
  
  mset$scantime <- MSnbase::rtime(mset$onDiskData)
  
  valCount <- cumsum(lengths(mset$env$mz, FALSE))
  mset$scanindex <- as.integer(c(0, valCount[-length(valCount)])) ## Get index vector for C calls
  
  npeaks <- dim(peaks)[1]; 
  scans  <- length(mset$scantime);
  eics <- matrix(NA,npeaks,maxscans);
  
  mset$env$mz <- as.double(unlist(mset$env$mz))
  mset$env$intensity <- as.double(unlist(mset$env$intensity))
  scan_time_leng <- as.integer(length(mset$scantime))
  
  message(npeaks," peaks found ! ")

  for (p in 1:npeaks) {
    
    timerange       <- c(peaks[p,"rtmin"],peaks[p,"rtmax"]);
    tidx <- which((mset$scantime >= timerange[1]) & (mset$scantime <= timerange[2]));
    if(length(tidx)>0){
      scanrange <- range(tidx);
    }else{
      scanrange <- 1:scans;
    }
    massrange <- c(peaks[p,"mzmin"],peaks[p,"mzmax"]);
    eic <- .Call(C_getEIC,
                 mset$env$mz,
                 mset$env$intensity,
                 as.integer(mset$scanindex),
                 as.double(massrange),
                 as.integer(scanrange),
                 scan_time_leng, 
                 PACKAGE ='MetaboAnalystR' )$intensity;
    
    eic[eic==0] <- NA;
    eics[p,scanrange[1]:scanrange[2]] <- eic; 
  }
  eics
}
calcCiS<- function(mSet, EIC=EIC, corval=0.75, pval=0.05, psg_list=NULL ){
  
  #Columns Peak 1, Peak 2, correlation coefficienct, Pseudospectrum Index
  resMat <- create.matrix(100000,4);
  colnames(resMat) <- c("x","y","cor","ps")
  cnt <- 0;
  
  npeaks <- nrow(mSet$AnnotateObject$groupInfo);
  
  ncl <- sum(sapply(mSet$AnnotateObject$pspectra, length));
  npeaks.global <- 0; #Counter for % bar
  npspectra <- length(mSet$AnnotateObject$pspectra);
  
  #Check if object have been preprocessed with groupFWHM
  if(npspectra < 1){
    npspectra <- 1;
    mSet$AnnotateObject$pspectra[[1]] <- seq(1:nrow(mSet$AnnotateObject$groupInfo));
    print('Calculating peak correlations in 1 group. '); 
    lp <- -1;
    pspectra_list     <- 1;
    mSet$AnnotateObject$psSamples  <- 1;
  }else{
    if(is.null(psg_list)){
      print(paste('Calculating peak correlations in',npspectra,'Groups... ')); 
      lp <- -1;
      pspectra_list <- 1:npspectra;
    }else{
      print(paste('Calculating peak correlations in',length(psg_list),'Groups... ')); 
      lp <- -1;
      pspectra_list <- psg_list;
      ncl <- sum(sapply(mSet$AnnotateObject$pspectra[psg_list],length));
    }
  }
  
  if(dim(EIC)[2] != npeaks){
    EIC <- t(EIC);
    #Second check, otherwise number of peaks != number of EIC curves
    if(dim(EIC)[2] != npeaks){
      stop(paste("Wrong dimension of EIC. It has ",dim(EIC)[1]," Rows for ",npeaks,"peaks",sep=""));
    }
  }
  lp <- -1;
  #Iterate over all PS-spectra
  pb <- progress_bar$new(format = "Generating [:bar] :percent Time left: :eta", total = length(pspectra_list), clear = T, width= 75)
  
  for(j in 1:length(pspectra_list)){
    i  <- pspectra_list[j];
    pi <- mSet$AnnotateObject$pspectra[[i]];
    npi <- length(pi);
    
    if( ((npi^2)/2 + cnt)  >= nrow(resMat)){
      #resize resMat
      resMat <- rbind(resMat,create.matrix(100000,4));
    }
    
    pb$tick();
    
    #Need at least two peaks for correlation
    if(npi < 2) {
      next;
    }
    
    #Suppress warnings
    options(warn = -1);
    res <- Hmisc::rcorr(EIC[, pi], type="pearson")
    options(warn = 0);
    
    #Set lower triangle to NA
    res$r[lower.tri(res$r,diag = TRUE)] <- NA;
    res$P[lower.tri(res$P,diag = TRUE)] <- NA;
    
    #Find peaks with have correlation higher corr_threshold and p <= 0.05
    index <- which(( res$r > corval) & (res$P <= pval))
    if( (length(index) + cnt)  >= nrow(resMat)){
      #resize resMat
      resMat <- rbind(resMat, create.matrix(max(length(index)+1,100000),4));
    }
    
    if(length(index) > 0){
      for( x in 1:(length(index))){
        col <- index[x] %/% npi + 1;
        row <- index[x] %%  npi;
        if(row == 0){
          row <- npi;
          col <- col - 1;
        }
        xi <- pi[row];
        yi <- pi[col];
        #y > x should be satisfied for every value, since we have set lower.tri to NA
        cnt<-cnt+1;
        resMat[cnt,] <- c(xi,yi,res$r[row, col],i);
      }
    }else{
      next;
    }
  }
  
  
  return(invisible(resMat[1:cnt,,drop=FALSE]))
}
calcPC.hcs <- function(mSet, ajc=NULL,psg_list=NULL) {
  
  npspectra <- length(mSet$AnnotateObject$pspectra);
  pspectra  <- mSet$AnnotateObject$pspectra
  psSamples <- mSet$AnnotateObject$psSamples;
  npeaks.global <- 0;
  ncl <- sum(sapply(mSet$AnnotateObject$pspectra, length));
  colnames(ajc)[3] <- c("weight") ##todo: Change to generell ajc interface
  
  #Information for % output
  if(is.null(psg_list)){
    print(paste('Calculating graph cross linking in', npspectra, 'Groups...'));
    lperc <- -1;
    pspectra_list <- 1:npspectra;
    ncl <- sum(sapply(mSet$AnnotateObject$pspectra, length));
  }else{
    print(paste('Calculating graph cross linking in',length(psg_list),'Groups...'));
    lperc <- -1;
    pspectra_list <- psg_list;
    ncl <- sum(sapply(mSet$AnnotateObject$pspectra[psg_list], length));
  }
  
  #peak counter
  npeaks <- 0;
  lp <- -1;
  pb <- progress_bar$new(format = "Calculating [:bar] :percent Time left: :eta", total = length(pspectra_list), clear = T, width= 75)
  
  for(j in 1:length(pspectra_list)){
    i  <- pspectra_list[j];#index of pseudospectrum
    pi <- mSet$AnnotateObject$pspectra[[i]]; #peak_id in pseudospectrum
    names(pi) <- as.character(pi);
    
    #percent output
    pb$tick();
    #end percent output
    
    index <- which(ajc[,4] == i)
    if(length(index) < 1){
      g <- ftM2graphNEL(matrix(nrow=0,ncol=2),V=as.character(pi),edgemode="undirected")
    }else{
      g <- ftM2graphNEL(ajc[index,1:2,drop=FALSE],W=ajc[index,3,drop=FALSE], V=as.character(pi), edgemode="undirected");
    }
    hcs <- highlyConnSG(g);
    
    #order cluster after size
    cnts <- sapply(hcs$clusters,length);
    grps <- 1:length(hcs$clusters);     
    grps <- grps[order(cnts, decreasing = TRUE)]
    
    for (ii in 1:length(grps)){
      if(ii==1){
        #save old pspectra number
        pspectra[[i]] <- as.integer(hcs$cluster[[grps[ii]]])
        pi[hcs$cluster[[grps[ii]]]] <- NA
      } else {
        npspectra <- npspectra +1
        pspectra[[npspectra]] <- as.integer(hcs$cluster[[grps[ii]]])
        pi[hcs$cluster[[grps[ii]]]] <- NA
        psSamples[npspectra] <- psSamples[i]
      }
    }
    index <- which(!is.na(pi));
    if(length(index)>0){
      for(ii in 1:length(index)){
        npspectra <- npspectra +1
        pspectra[[npspectra]] <- pi[index[ii]]
        psSamples[npspectra] <- psSamples[i]
      }
    }
    
  }
  
  mSet$AnnotateObject$pspectra  <- pspectra;
  mSet$AnnotateObject$psSamples <- psSamples;
  
  print(paste("New number of ps-groups: ",length(pspectra)));
  return(mSet)
}
findAdducts <- function(mSet, ppm=5, mzabs=0.015, multiplier=3, polarity=NULL, rules=NULL, 
                        max_peaks=100, psg_list=NULL, intval="maxo",maxcharge){
  multFragments=FALSE;
  # Scaling ppm factor
  devppm <- ppm / 1000000;
  # counter for % bar
  npeaks.global <- 0;
  
  # get mz values from peaklist
  imz    <- mSet$AnnotateObject$groupInfo[, "mz"];
  
  #number of pseudo-spectra
  npspectra <- length(mSet$AnnotateObject$pspectra);
  
  #If groupCorr or groupFHWM have not been invoke, select all peaks in one sample
  if(npspectra < 1){ 
    npspectra <- 1;
    mSet$AnnotateObject$pspectra[[1]] <- seq(1:nrow(mSet$AnnotateObject$groupInfo)); 
  }
  
  if(mSet$AnnotateObject$sample == 1 && length(rownames(mSet$xcmsSet@phenoData)) == 1){
    ##one sample case 
    mint <- mSet$AnnotateObject$groupInfo[, intval];
  }else{
    ##multiple sample
    if(is.na(mSet$AnnotateObject$sample[1])){
      index <- 1:length(mSet$xcmsSet@filepaths);
    }else{
      index <- mSet$AnnotateObject$sample;
    }
    
    print("Generating peak matrix for peak annotation!");
    mint     <- groupval(mSet$xcmsSet,value=intval)[, index, drop=FALSE];
    peakmat  <- mSet$xcmsSet@peaks;
    groupmat <- mSet$xcmsSet@groups;
    
    imz <- groupmat[, "mzmed"];
    irt <- groupmat[, "rtmed"];
    int.val <- c();
    nsample <- length(mSet$AnnotateObject$sample);
  }
  
  
  # isotopes
  isotopes  <- mSet$AnnotateObject$isotopes;
  # adductlist
  derivativeIons <- vector("list", length(imz));
  
  # other variables
  oidscore <- c();
  index    <- c();
  annoID   <- matrix(ncol=4, nrow=0)
  annoGrp  <- matrix(ncol=4, nrow=0)
  colnames(annoID)  <-  colnames(mSet$AnnotateObject$annoID)
  colnames(annoGrp) <-  colnames(mSet$AnnotateObject$annoGrp)
  
  ##Examine polarity and rule set
  if(!(mSet$AnnotateObject$polarity == "")){
    print(paste("Polarity is set in annotaParam:", mSet$AnnotateObject$polarity));
    if(is.null(rules)){
      if(!is.null(mSet$AnnotateObject$ruleset)){
        rules <- mSet$AnnotateObject$ruleset;
      }else{ 
        print("Ruleset could not read from object! Recalculate");
        rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
                           polarity=mSet$AnnotateObject$polarity, 
                           lib.loc= .libPaths(), multFragments=multFragments);
        mSet$AnnotateObject$ruleset <- rules;
      }
    }else{ 
      mSet$AnnotateObject$ruleset <- rules;
      print("Found and use user-defined ruleset!");
    }
  }else{
    if(!is.null(polarity)){
      if(polarity %in% c("positive","negative")){
        if(is.null(rules)){
          rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, 
                             nh=2, polarity=polarity, lib.loc= .libPaths(),
                             multFragments=multFragments);
        }else{ print("Found and use user-defined ruleset!");}
        mSet$AnnotateObject$polarity <- polarity;
      }else stop("polarity mode unknown, please choose between positive and negative.")
    }else if(length(mSet$xcmsSet@polarity) > 0){
      index <- which(sampclass(mSet$xcmsSet) == mSet$AnnotateObject$category)[1] + mSet$AnnotateObject$sample-1;
      if(mSet$xcmsSet@polarity[index] %in% c("positive","negative")){
        if(is.null(rules)){
          rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, 
                             nh=2, polarity=mSet$xcmsSet@polarity[index], 
                             lib.loc= .libPaths(), multFragments=multFragments);
        }else{ print("Found and use user-defined ruleset!");}
        mSet$AnnotateObject$polarity <- polarity;
      }else stop("polarity mode in xcmsSet unknown, please define variable polarity.")
    }else stop("polarity mode could not be estimated from the xcmsSet, please define variable polarity!")
    #save ruleset
    mSet$AnnotateObject$ruleset <- rules;
  }
  
  ##Run as single or parallel mode
  runParallel <- 0;
  
  if(mSet$AnnotateObject$runParallel$enable == 1){
    if(!(is.null(mSet$AnnotateObject$runParallel$cluster)) || mpi.comm.size() > 0 ){
      runParallel <- 1;
    }else{
      warning("CAMERA runs in parallel mode, but no slaves are spawned!\nRun in single core mode!\n");
      runParallel <- 0;
    }
  }else{
    runParallel <- 0;
  }
  
  if("quasi" %in% colnames(rules)){
    #backup for old rule sets
    quasimolion <- which(rules[, "quasi"]== 1) 
  }else{
    quasimolion <- which(rules[, "mandatory"]== 1)
  }
  
  #Remove recognized isotopes from annotation m/z vector
  for(x in seq(along = isotopes)){
    if(!is.null(isotopes[[x]])){
      if(isotopes[[x]]$iso != 0){
        imz[x] <- NA;
      }
    }
  }
  
  #counter for % bar
  npeaks    <- 0; 
  massgrp   <- 0;
  ncl <- sum(sapply(mSet$AnnotateObject$pspectra, length));  
  
  if (runParallel == 1) { ## ... we run in parallel mode
    if(is.null(psg_list)){
      print(paste('Calculating possible adducts in',npspectra,'Groups... '));
      lp <- -1;
      pspectra_list <- 1:npspectra;
    }else{
      print(paste('Calculating possible adducts in',length(psg_list),'Groups... ')); 
      lp <- -1;
      pspectra_list <- psg_list;
    }
    
    argList <- list();
    
    cnt_peak <- 0;
    if(is.null(max_peaks)){
      max_peaks=100;
    }
    paramsGlobal <- list();
    if("typ" %in% colnames(rules)){
      rules.idx <- which(rules[, "typ"]== "A")
      parent <- TRUE;
    }else{
      #backup for old rule sets
      rules.idx <- 1:nrow(rules);
      parent <- FALSE;
    }
    
    #Add params to env
    paramsGlobal <- list()
    paramsGlobal$pspectra <- mSet$AnnotateObject$pspectra;
    paramsGlobal$imz <- imz;
    paramsGlobal$rules <- rules;
    paramsGlobal$mzabs <- mzabs;
    paramsGlobal$devppm <- devppm;
    paramsGlobal$isotopes <- isotopes;
    paramsGlobal$quasimolion <- quasimolion;
    paramsGlobal$parent <- parent;
    paramsGlobal$rules.idx <- rules.idx;
    #create params
    #paramsGlobal <- list2env(params)
    
    params <- list();
    
    for(j in 1:length(pspectra_list)){
      i <- pspectra_list[j];
      params$i[[length(params$i)+1]] <- i;
      cnt_peak <- cnt_peak+length(mSet$AnnotateObject$pspectra[[i]]);
      if(cnt_peak > max_peaks || j == length(pspectra_list)){
        argList[[length(argList)+1]] <- params
        cnt_peak <- 0;
        params <- list();
      }
    }
    
    #Some informationen for the user
    print(paste("Parallel mode: There are",length(argList), "tasks."))
    
    if(is.null(mSet$AnnotateObject$runParallel$cluster)){
      #Use MPI
      result <- xcmsPapply(argList, annotateGrpMPI2, paramsGlobal)
    }else{
      #For snow
      result <- xcms:::xcmsClusterApply(cl=mSet$AnnotateObject$runParallel$cluster, 
                                        x=argList, fun=annotateGrpMPI, 
                                        msgfun=msgfun.snowParallel,
                                        paramsGlobal)
    }
    
    for(ii in 1:length(result)){
      if(length(result[[ii]]) == 0){
        next;
      }
      for(iii in 1:length(result[[ii]])){
        hypothese <- result[[ii]][[iii]];
        if(is.null(hypothese)){
          next;
        }
        charge <- 0;
        old_massgrp <- 0;
        index <- argList[[ii]]$i[[iii]];
        ipeak <- mSet$AnnotateObject$pspectra[[index]];
        for(hyp in 1:nrow(hypothese)){
          peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]);
          if(old_massgrp != hypothese[hyp,"massgrp"]) {
            massgrp <- massgrp+1;
            old_massgrp <- hypothese[hyp,"massgrp"];
            annoGrp <- rbind(annoGrp,c(massgrp,hypothese[hyp,"mass"],
                                       sum(hypothese[ which(hypothese[,"massgrp"]==old_massgrp),"score"]),i) ) 
          }
          
          if(parent){
            annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp, c("ruleID","parent")]))  
          }else{
            annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp, c("ruleID")],NA))
          }
          
        }
      }
    }
    
    derivativeIons <- getderivativeIons(annoID,annoGrp,rules,length(imz));
    
    mSet$AnnotateObject$derivativeIons <- derivativeIons;
    mSet$AnnotateObject$annoID  <- annoID;
    mSet$AnnotateObject$annoGrp <- annoGrp;
    return(object)
  } else {
    ##Parallel Core Mode
    if(is.null(psg_list)){
      print(paste('Calculating possible adducts in',npspectra,'Groups... ')); 
      lp <- -1;
      pspectra_list <- 1:npspectra;
    }else{
      print(paste('Calculating possible adducts in',length(psg_list),'Groups... ')); 
      lp <- -1;
      pspectra_list <- psg_list;
      sum_peaks <- sum(sapply(mSet$AnnotateObject$pspectra[psg_list],length));
    }
    
    if("typ" %in% colnames(rules)){
      rules.idx <- which(rules[, "typ"]== "A")
      parent <- TRUE;
    }else{
      #backup for old rule sets
      rules.idx <- 1:nrow(rules);
      parent <- FALSE;
    }
    along = pspectra_list;
    pb <- progress_bar$new(format = "Calculating [:bar] :percent Time left: :eta", total = length(along), clear = T, width= 75)
    
    for(j in seq(along)){
      i <- pspectra_list[j];
      
      #peak index for those in pseudospectrum i
      ipeak <- mSet$AnnotateObject$pspectra[[i]];
      
      #percent output
      pb$tick();
      #end percent output
      
      #check if the pspec contains more than one peak 
      if(length(ipeak) > 1){
        
        hypothese <- annotateGrp(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx=rules.idx);
        #save results
        if(is.null(hypothese)){
          next;
        }
        charge <- 0;
        old_massgrp <- 0;
        
        #combine annotation hypotheses to annotation groups for one compound mass
        for(hyp in 1:nrow(hypothese)){
          peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]);
          if(old_massgrp != hypothese[hyp, "massgrp"]) {
            massgrp <- massgrp + 1;
            old_massgrp <- hypothese[hyp, "massgrp"];
            annoGrp <- rbind(annoGrp, c(massgrp, hypothese[hyp, "mass"], 
                                        sum(hypothese[ which(hypothese[, "massgrp"] == old_massgrp), "score"]), i) ) 
          }
          if(parent){
            annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], ipeak[hypothese[hyp, "parent"]]))
          }else{
            annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], NA))
          }
        }
      }
    }
    
    derivativeIons <- getderivativeIons(annoID, annoGrp, rules, length(imz));
    
   
    mSet$AnnotateObject$derivativeIons <- derivativeIons;
    mSet$AnnotateObject$annoID  <- annoID;
    mSet$AnnotateObject$annoGrp <- annoGrp;
    return(mSet)
  }
}
getPeaklist <- function(mSet, intval="into") {
  
  if (! intval %in% colnames(mSet$xcmsSet@peaks)) {
    stop("unknown intensity value!")
  }
  
  #generate peaktable
  #Check if xcmsSet contains only one sample
  if(mSet$AnnotateObject$sample == 1 && length(rownames(mSet$xcmsSet@phenoData)) == 1){
    #intval is here ignored since all intensity values are already contained
    peaktable <- mSet$AnnotateObject$groupInfo;
  }else {
    #Case of xcmsSet with multiple samples
    #Use groupInfo information and replace intensity values
    peaktable <- mSet$AnnotateObject$groupInfo;
    
    #get intensity values from xcmsSet
    grpval <- groupval(mSet$xcmsSet, value=intval);
    
    #get column range for replacement
    grpval.ncol <- ncol(grpval)
    start <- ncol(peaktable) - grpval.ncol +1;
    ende  <- start + grpval.ncol - 1; 
    
    peaktable[, start:ende] <- grpval;
  }
  
  #allocate variables for CAMERA output
  adduct   <- vector("character", nrow(mSet$AnnotateObject$groupInfo));
  isotopes <- vector("character", nrow(mSet$AnnotateObject$groupInfo));
  pcgroup  <- vector("character", nrow(mSet$AnnotateObject$groupInfo));
  
  #default polarity set to positive
  polarity <- "+";
  
  if(length(mSet$AnnotateObject$polarity) > 0){
    if(mSet$AnnotateObject$polarity == "negative"){
      polarity <- "-";
    }
  }
  
  #First isotope informationen and adduct informationen
  for(i in seq(along = isotopes)){
    #check if adduct annotation is present for peak i
    if(length(mSet$AnnotateObject$derivativeIons) > 0 && !(is.null(mSet$AnnotateObject$derivativeIons[[i]]))) {
      #Check if we have more than one annotation for peak i
      if(length(mSet$AnnotateObject$derivativeIons[[i]]) > 1) {
        #combine ion species name and rounded mass hypophysis
        names <- paste(mSet$AnnotateObject$derivativeIons[[i]][[1]]$name, signif(mSet$AnnotateObject$derivativeIons[[i]][[1]]$mass, 6));
        for(ii in 2:length(mSet$AnnotateObject$derivativeIons[[i]])) {
          names <- paste(names, mSet$AnnotateObject$derivativeIons[[i]][[ii]]$name, signif(mSet$AnnotateObject$derivativeIons[[i]][[ii]]$mass, 6));
        }
        #save name in vector adduct
        adduct[i] <- names;
      } else {
        #Only one annotation
        adduct[i] <- paste(mSet$AnnotateObject$derivativeIons[[i]][[1]]$name, signif(mSet$AnnotateObject$derivativeIons[[i]][[1]]$mass, 6));
      }
    } else {
      #no annotation empty name
      adduct[i] <- ""; 
    }
    
    #Check if we have isotope informationen about peak i
    if(length(mSet$AnnotateObject$isotopes) > 0&& !is.null(mSet$AnnotateObject$isotopes[[i]])) {
      num.iso <- mSet$AnnotateObject$isotopes[[i]]$iso;
      #Which isotope peak is peak i?
      if(num.iso == 0){
        str.iso <- "[M]";
      } else { 
        str.iso <- paste("[M+", num.iso, "]", sep="")
      }
      #Multiple charged?
      if(mSet$AnnotateObject$isotopes[[i]]$charge > 1){
        isotopes[i] <- paste("[", mSet$AnnotateObject$isotopes[[i]]$y, "]", str.iso, mSet$AnnotateObject$isotopes[[i]]$charge, polarity, sep="");
      }else{
        isotopes[i] <- paste("[", mSet$AnnotateObject$isotopes[[i]]$y, "]", str.iso, polarity, sep="");
      }
    } else { 
      #No isotope informationen available
      isotopes[i] <- ""; 
    }
  }
  
  #Have we more than one pseudospectrum?
  if(length(mSet$AnnotateObject$pspectra) < 1){
    pcgroup <- 0;
  } else {
    for(i in seq(along = mSet$AnnotateObject$pspectra)){
      index <- mSet$AnnotateObject$pspectra[[i]];
      pcgroup[index] <- i;
    }
  }
  
  rownames(peaktable)<-NULL;#Bugfix for: In data.row.names(row.names, rowsi, i) :  some row.names duplicated:
  return(invisible(data.frame(peaktable, isotopes, adduct, pcgroup, stringsAsFactors=FALSE, row.names=NULL)));
}
sampclass <- function(object) {
  if (ncol(object@phenoData) >0) {
    if(any(colnames(object@phenoData)=="class")){
      sclass <- object$class
      ## in any rate: transform class to a character vector
      ## and generate a new factor on that with the levels
      ## being in the order of the first occurrence of the
      ## elements (i.e. no alphanumeric ordering).
      sclass <- as.character(sclass)
      sclass <- factor(sclass, levels=unique(sclass))
      return(sclass)
    }
    interaction(object@phenoData, drop=TRUE)
  } else {
    factor()
  }
}
calcRules <- function (maxcharge=3, mol=3, nion=2, nnloss=1,
                       nnadd=1, nh=2, polarity=NULL, lib.loc = .libPaths(), multFragments=FALSE){
  
  r <- new("ruleSet")
  
  r <- setDefaultLists(r, lib.loc=lib.loc)
  r <- readLists(r)  
  r <- setParams(r, maxcharge=maxcharge, mol=mol, nion=nion,
                 nnloss=nnloss, nnadd=nnadd, nh=nh, polarity=polarity, lib.loc = lib.loc)  
  if(multFragments){
    r <- generateRules2(r)
  }else{
    r <- generateRules(r)
  }
  
  return(r@rules)
}

generateRules <- function (object) {
  
  maxcharge=object@maxcharge
  mol=object@mol
  nion=object@nion
  nnloss=object@nnloss
  nnadd=object@nnadd 
  nh=object@nh
  polarity=object@polarity
  
  ionlist=object@ionlist
  neutralloss=object@neutralloss
  neutraladdition=object@neutraladdition              
  
  rules=object@rules
  
  name<-c();
  nmol<-c();
  charge<-c();
  massdiff<-c();
  oidscore<-c();
  quasi<-c();
  ips<-c();
  
  ##Erzeuge Regeln
  tmpname   <- c();
  tmpnmol   <- c();
  tmpcharge <- 0;
  tmpmass   <- 0;
  tmpips    <- 0;
  
  ## Molekülionen
  if(polarity=="positive"){
    ## Wasserstoff, hard codiert
    for(k in 1:mol){
      if(k == 1){
        str    <- "";
        tmpips <- 1.0;
      }else{
        str    <-  k;
        tmpips <- 0.5;
      };
      name     <- append(name, paste("[", str, "M+H]+", sep=""));
      charge   <- append(charge, 1);
      massdiff <- append(massdiff, 1.007276);
      nmol     <- append(nmol, k);
      if(k == 1) {
        quasi  <- append(quasi, 1);
      } else { 
        quasi  <- append(quasi, 0);
      };
      oidscore <- append(oidscore, 1);
      ips      <- append(ips, tmpips)
      name     <- append(name,paste("[",str,"M+2H]2+",sep=""));
      charge<-append(charge,2);
      massdiff<-append(massdiff,2.014552);
      nmol<-append(nmol,k);quasi<-append(quasi,0);
      oidscore<-append(oidscore,2);
      ips<-append(ips,tmpips)
      name<-append(name,paste("[",str,"M+3H]3+",sep=""));
      charge<-append(charge,3);
      massdiff<-append(massdiff,3.021828);
      nmol<-append(nmol,k);
      quasi<-append(quasi,0);
      oidscore<-append(oidscore,3);
      ips<-append(ips,tmpips)
      oid<-3;
      for(i in 1:nrow(ionlist)){
        if(ionlist[i,2]<=0) {next;}
        if(ionlist[i,2]==1){
          name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]2+",sep=""));
        }else{
          name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]",ionlist[i,2]+1,"+",sep=""));
        }
        charge <- append(charge,ionlist[i,2]+1);
        massdiff <- append(massdiff,ionlist[i,3]+1.007276);
        nmol <- append(nmol,k);
        quasi <- append(quasi,0);
        oidscore <- append(oidscore,oid+i);
        if(tmpips>0.75){
          ips<-append(ips,0.5)
        }else{
          ips<-append(ips,tmpips);
        }#Austausch
      }
      oid<-oid+nrow(ionlist);
      coeff<-expand.grid(rep(list(0:nion),nrow(ionlist)))
      if(length(list<-which(ionlist[,2]<=0))>0){
        coeff[,list]<-0;
      }
      coeff<-unique(coeff);
      coeff<-cbind(coeff,rep(0,nrow(coeff)));
      coeff<-coeff[-1,]
      tmp<-NULL;
      for(i in 1:nrow(ionlist)){
        if(ionlist[i,2]<=0)next;
        ## Austausch erstmal nur einen pro Ion
        tmp<-rbind(tmp,t(apply(coeff,1,function(x) {x[i]<-x[i]+1;x[nrow(ionlist)+1]<-1;x})));
      }
      coeff<-unique(rbind(coeff,tmp));
      i <- 1
      while (i <= nrow(coeff)){
        tmpname<-paste("[",str,"M",sep="");
        tmpcharge<-0;tmpmass<-0;
        for(ii in 1:(ncol(coeff)-1)){
          if(coeff[i,ii]>0){
            if(coeff[i,ii]>1){
              tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep="");
            }else{
              tmpname<-paste(tmpname,"+",ionlist[ii,1],sep="");
            }
            tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2];
            tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3]
          }
        }
        if(coeff[i,ncol(coeff)]>0){
          ## Austausch hat stattgefunden, einfach bsp 1
          tmpname<-paste(tmpname,"-H",sep="");
          tmpcharge<-tmpcharge-1;
          tmpmass<-tmpmass-1.007276;
          tmpips<-0.25;
        }
        if(tmpcharge>1){
          tmpname<-paste(tmpname,"]",tmpcharge,"+",sep="")
        }else{
          tmpname<-paste(tmpname,"]+",sep="")
        }
        name<-append(name,tmpname)
        charge<-append(charge,tmpcharge)
        massdiff<-append(massdiff,tmpmass)
        nmol <- append(nmol,k);
        oidscore<-append(oidscore,oid+i)
        if(sum(coeff[i,])==1&& k==1){
          quasi <- append(quasi,1);
          ips <-append(ips,tmpips);
        }else{
          quasi <- append(quasi,0);
          if(tmpips>0.75){
            ips <- append(ips,0.75);
          }else{
            ips <- append(ips,tmpips);
          }
        }
        i <- i+1
      }
    }
    oid<-max(oidscore);
    
    ## Create Neutral Addition
    index<-which(quasi==1)
    
    if (nrow(neutraladdition)>0) {
      for(i in 1:nrow(neutraladdition)) {
        if(length(index2<-which(ionlist[,2]>0))>0){
          for(ii in 1:length(index2)){
            if(ionlist[index2[ii],2] > 1){
              name    <-  append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"+",sep=""));
            }else{
              name    <-  append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]+",sep=""));
            }
            charge <- append(charge,ionlist[index2[ii],2]);
            massdiff <- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]);
            nmol <- append(nmol,1);
            quasi <- append(quasi,0);
            oidscore    <-  append(oidscore,oid+1);oid<-oid+1;
            ips<-append(ips,0.5);
          }
        }
        if(neutraladdition[i,1]=="NaCOOH"){next;}
        name<-append(name,paste("[M+H+",neutraladdition[i,1],"]+",sep=""));
        charge<-append(charge,+1);
        massdiff<- append(massdiff,neutraladdition[i,2]+1.007276);
        nmol<-append(nmol,1);
        quasi<-append(quasi,0)
        oidscore<-append(oidscore,oid+1);oid<-oid+1;
        ips<-append(ips,0.5);
      }
      oid<-max(oidscore);
    }
    
    ##Erzeuge Neutral loss
    index<-which(quasi==1)
    for(i in 1:nrow(neutralloss)){
      for(ii in 1:maxcharge){
        if(ii > 1){
          name<-append(name,paste("[M+",ii,"H-",neutralloss[i,1],"]",ii,"+",sep=""));
        }else {name<-append(name,paste("[M+H-",neutralloss[i,1],"]+",sep=""));}
        charge<-append(charge,ii);
        massdiff<-  append(massdiff,-neutralloss[i,2]+1.007276*ii);
        nmol<-append(nmol,1);
        quasi<-append(quasi,0)
        oidscore<-append(oidscore,oid+1);oid<-oid+1;
        ips<-append(ips,0.25);
      }
    }
    ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips)
    if(length(index<-which(ruleset[,"charge"]>maxcharge))>0){
      ruleset<- ruleset[-index,];
    }
  }else if(polarity=="negative"){
    ## Wasserstoff, hard codiert
    for(k in 1:mol){
      if(k==1){str<-"";tmpips<-1.0;}else{str<-k;tmpips<-0.5};
      name<-append(name,paste("[",str,"M-H]-",sep=""));
      charge<-append(charge,-1);massdiff<-append(massdiff,-1.007276);nmol<-append(nmol,k);if(k==1){quasi<-append(quasi,1);}else{quasi<-append(quasi,0);};oidscore<-append(oidscore,1);ips<-append(ips,tmpips)
      name<-append(name,paste("[",str,"M-2H]2-",sep=""));charge<-append(charge,-2);massdiff<-append(massdiff,-2.014552);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,2);ips<-append(ips,tmpips)
      name<-append(name,paste("[",str,"M-3H]3-",sep=""));charge<-append(charge,-3);massdiff<-append(massdiff,-3.021828);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,3);ips<-append(ips,tmpips)
      oid<-3;
      for(i in 1:nrow(ionlist)){
        if(ionlist[i,2]>=0){
          if(ionlist[i,2] == 1){
            name<-append(name,paste("[",str,"M-2H+",ionlist[i,1],"]-",sep=""));
            charge <- append(charge,ionlist[i,2]-2);
            massdiff<- append(massdiff,ionlist[i,3]-(2*1.007276));
            nmol <- append(nmol,k);
            quasi <- append(quasi,0);
            oidscore<-append(oidscore,oid+i);
            ips<-append(ips,0.25);
            next;
          } else {
            if(ionlist[i,2] > maxcharge) {next;}
            localCharge <- ionlist[i,2]+1
            name<-append(name,paste("[",str,"M-",localCharge,"H+",ionlist[i,1],"]-",sep=""));
            charge <- append(charge,ionlist[i,2]-localCharge);
            massdiff<- append(massdiff,ionlist[i,3]-(localCharge*1.007276));
            nmol <- append(nmol,k);
            quasi <- append(quasi,0);
            oidscore<-append(oidscore,oid+i);
            ips<-append(ips,0.25);
            next;
          }
        }
        if(ionlist[i,2]== -1){
          name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]2-",sep=""));
        }else{
          name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]",ionlist[i,2]+1,"-",sep=""));
        }
        charge <- append(charge,ionlist[i,2]-1);
        massdiff<- append(massdiff,ionlist[i,3]-1.007276);
        nmol <- append(nmol,k);
        quasi <- append(quasi,0);
        oidscore<-append(oidscore,oid+i);
        ips<-append(ips,tmpips);
        #Austausch
      }
      oid<-oid+nrow(ionlist);
      coeff<-expand.grid(rep(list(0:nion),nrow(ionlist)))
      if(length(list<-which(ionlist[,2]>=0))>0){
        coeff[,list]<-0;
      }
      coeff<-unique(coeff);
      coeff<-cbind(coeff,rep(0,nrow(coeff)));
      coeff<-coeff[-1,] ## remove first row
      i <- 1
      while (i <= nrow(coeff)){
        tmpname<-paste("[",str,"M",sep="");
        tmpcharge<-0;tmpmass<-0;
        for(ii in 1:(ncol(coeff)-1)){
          if(coeff[i,ii]>0){
            if(coeff[i,ii]>1){
              tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep="");
            }else{
              tmpname<-paste(tmpname,"+",ionlist[ii,1],sep="");
            }
            tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2];
            tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3]
          }
        }
        if(coeff[i,ncol(coeff)]>0){
          #Austausch hat stattgefunden, einfach bsp 1
          tmpname<-paste(tmpname,"-H",sep="");
          tmpcharge<-tmpcharge-1;
          tmpmass<-tmpmass-1.007276;
          tmpips<-0.5;
        }
        if(tmpcharge< -1){
          tmpname<-paste(tmpname,"]",abs(tmpcharge),"-",sep="")
        }else{
          tmpname<-paste(tmpname,"]-",sep="")
        }
        name<-append(name,tmpname)
        charge<-append(charge,tmpcharge)
        massdiff<-append(massdiff,tmpmass)
        nmol <-append(nmol,k);
        oidscore<-append(oidscore,oid+i)
        if(sum(coeff[i,])==1&& k==1){
          quasi   <-append(quasi,1);
        }else{
          quasi <-append(quasi,0);
        }
        ips <-append(ips,tmpips);
        i <- i+1
      }
    }
    oid<-max(oidscore);
    
    ##Erzeuge Neutral Addition
    index<-which(quasi==1)
    for(i in 1:nrow(neutraladdition)){
      if(length(index2<-which(ionlist[,2]<0))>0){
        for(ii in 1:length(index2)){
          if(ionlist[index2[ii],2]< -1){
            name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"-",sep=""));
          }else{
            name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]-",sep=""));
          }
          charge <- append(charge,ionlist[index2[ii],2]);
          massdiff<- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]);
          nmol <- append(nmol,1);
          quasi <- append(quasi,0);
          oidscore<-append(oidscore,oid+1);oid<-oid+1;
          ips<-append(ips,0.5);
        }
      }
      name<-append(name,paste("[M-H+",neutraladdition[i,1],"]-",sep=""));
      charge<-append(charge,-1);
      massdiff<- append(massdiff,neutraladdition[i,2]-1.007276);
      nmol<-append(nmol,1);
      quasi<-append(quasi,0)
      oidscore<-append(oidscore,oid+1);oid<-oid+1;
      ips<-append(ips,0.5);
    }
    oid<-max(oidscore);
    
    ##Erzeuge Neutral loss
    index<-which(quasi==1)
    for(i in 1:nrow(neutralloss)){
      name<-append(name,paste("[M-H-",neutralloss[i,1],"]-",sep=""));
      charge<-append(charge,-1);
      massdiff<-  append(massdiff,-neutralloss[i,2]-1.007276);
      nmol<-append(nmol,1);
      quasi<-append(quasi,0)
      oidscore<-append(oidscore,oid+1);oid<-oid+1;
      ips<-append(ips,0.25);
    }
    ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips)
    if(length(index<-which(ruleset[,"charge"]< -maxcharge))>0){
      ruleset<- ruleset[-index,];
    }
  }else stop("Unknown error")
  
  ## Update object rules and return ruleset
  object@rules=ruleset
  object;
}
generateRules2 <- function (object) {
  
  maxcharge=object@maxcharge
  mol=object@mol
  nion=object@nion
  nnloss=object@nnloss
  nnadd=object@nnadd 
  nh=object@nh
  polarity=object@polarity
  
  ionlist=object@ionlist
  neutralloss=object@neutralloss
  neutraladdition=object@neutraladdition              
  
  rules=object@rules
  
  ##Create Rules
  ruleset     <- matrix(nrow=0, ncol=8);
  colnames(ruleset) <- c("name", "nmol","charge", "massdiff", "typ","mandatory",
                         "score", "parent")
  
  tmpname   <- c();
  tmpcharge <- 0;
  tmpmass   <- 0;
  tmpionparent <- NA;
  massH <- 1.007276
  
  ##Positive Rule set
  if(polarity == "positive"){
    charge <- "+"
    chargeValue <- 1
  }else if(polarity == "negative"){
    charge <- "-"
    chargeValue <- -1
  }else{
    stop("Unknown polarity mode in rule set generation! Debug!\n")
  }
  
  #Hydrogen part is hard coded
  for(k in 1:mol) {
    if(k == 1){
      #For M+H
      str    <- "";
      tmpips <- 1.5;
      quasi  <- 1 
    }else{
      #For xM+H
      str    <-  k;
      tmpips <- 0;
      quasi  <- 0 
    }
    
    for(xh in seq.int(nh)){
      if(xh == 1){
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, "H]", charge, sep=""), k, chargeValue, 
                                        massH*chargeValue, "A", 
                                        quasi, tmpips+0.5, tmpionparent));
      } else {
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, xh, "H]", xh, charge, sep=""), 
                                        k,xh*chargeValue, massH*xh*chargeValue, "A", 0, 0.5, tmpionparent+1));
      }
    }
    
    for(i in 1:nrow(ionlist)){
      #Add change of kat- respectively anion against one hydrogen
      if(ionlist[i, 2] > 0 & charge == "-"){
        if(ionlist[i, 2] == 1){
          sumcharge <- "";
          hdiff <- 1;
        }else{
          sumcharge <- ionlist[i, 2];
          hdiff <- ionlist[i,2]-1;
        }
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "-", sumcharge,"H]", "",
                                              charge, sep=""), k, ionlist[i, 2]*chargeValue, 
                                        ionlist[i, 3]-massH*(1+hdiff),
                                        "A", 0, 0.25, tmpionparent));  
      }
      
      #xM+H + additional Kation like Na or K      
      if(ionlist[i,2] <= 0 & chargeValue > 0) {
        #Ions with negative charge, when polarity is positive
        next;
      } else if(ionlist[i, 2] >= 0 & chargeValue < 0){
        #Ions with positive charge, when polarity is negative
        next;
      }
      
      #Add Rule to Ruleset
      if(abs(ionlist[i, 2]) == 1){
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "]2", 
                                              charge, sep=""), k, ionlist[i, 2]+1*chargeValue, 
                                        ionlist[i, 3]+massH*chargeValue, "A", 0, 0.25, tmpionparent));
      }else{
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i, 1], "]", 
                                              ionlist[i, 2]+(1*chargeValue),
                                              charge, sep=""), k, ionlist[i, 2]+1*chargeValue, 
                                        ionlist[i, 3]+massH*chargeValue, "A" ,0, 0.25, tmpionparent));
      }
      
    }#End for loop nrow(ionlist)
    
    ##Coeff - coefficient Matrix, for generating rules with
    #combination of kat- or anionsexchange ions like [M-2H+Na] (M-H and change of H against Na)
    coeff <- expand.grid(rep(list(0:nion), nrow(ionlist)))
    if(chargeValue > 0){
      index <- which(ionlist[, 2] <= 0);
    }else{
      index <- which(ionlist[, 2] >= 0);
    }
    
    if(length(index) > 0){
      coeff[, index] <- 0;
    }
    
    tmp <- NULL;
    for(i in 1:nrow(ionlist)){
      if((chargeValue > 0 & ionlist[i, 2] <= 0) | (chargeValue < 0 & ionlist[i,2] >=0)){
        next;
      }
      #Austausch erstmal nur einen pro Ion
      tmp <- rbind(tmp, t(apply(coeff, 1, function(x) {
        x[i] <- x[i]+1;
        x[nrow(ionlist)+1] <- -1;
        x }
      )));
    }
    coeff <- cbind(coeff, rep(0, nrow(coeff)));
    colnames(coeff)[4] <- "Var4"
    colnames(tmp)[4] <- "Var4"
    coeff <- unique(rbind(coeff, tmp));
    
    for(i in 1:nrow(coeff)){
      if(sum(coeff[i, 1:nrow(ionlist)]) > 2 | sum(coeff[i, 1:nrow(ionlist)]) < 1){
        next;
      }
      
      tmpname   <- paste("[",str,"M",sep="");
      tmpcharge <- 0;
      tmpmass   <- 0;
      
      for(ii in 1:(ncol(coeff))){
        if(coeff[i,ii] > 0){
          if(coeff[i,ii] > 1){
            tmpname <- paste(tmpname, "+", coeff[i, ii], ionlist[ii, 1], sep="");
          }else{
            tmpname <- paste(tmpname, "+", ionlist[ii, 1], sep="");
          }
          tmpcharge <- tmpcharge + coeff[i, ii] * ionlist[ii, 2];
          tmpmass   <- tmpmass + coeff[i, ii] * ionlist[ii, 3];
        } else if (coeff[i,ii] < 0){
          tmpname <- paste(tmpname, "-H", sep="");
          tmpcharge <- tmpcharge - 1;
          tmpmass   <- tmpmass  - massH;
        }
      }
      
      if(abs(tmpcharge) > 1){
        tmpname <- paste(tmpname, "]", tmpcharge, charge, sep="")
      }else{
        tmpname <- paste(tmpname, "]", charge, sep="")
      }
      
      if(tmpcharge > maxcharge | tmpcharge == 0){
        next;
      }
      
      if(sum(coeff[i, ]) == 1 && k == 1 && coeff[i, 4] >=0){
        ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 1, 0.75, tmpionparent));
      }else{
        ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 0, 0.25, tmpionparent));
      }
    }#end for loop nrow(coeff)
  }#end for loop k
  
  # Create neutral addition to M+H from list
  for(i in 1:nrow(neutraladdition)){
    #Add neutral ion to only M+H
    ruleset <- rbind(ruleset, cbind(paste("[M", charge, "H+", neutraladdition[i, 1], "]", charge, sep="") , 1, chargeValue, 
                                    neutraladdition[i, 2]+(massH*chargeValue), "A", 0, 0.25, 1));
  }
  
  ## Add neutral loss from list to ruleset
  for(i in 1:nrow(neutralloss)){
    ruleset <- rbind(ruleset, cbind(paste("-", neutralloss[i, 1], sep=""), 1, 0, 
                                    -neutralloss[i, 2], "F", 0, 0.25, 1));
    #Eliminate rules with charge > maxcharge
    if(length(index <- which(ruleset[, "charge"] > maxcharge)) > 0){
      ruleset <- ruleset[-index, ];
    }
  }
  ruleset <- as.data.frame(ruleset, stringsAsFactors=FALSE)
  class(ruleset$nmol) <- "numeric"
  class(ruleset$charge) <- "numeric"
  class(ruleset$massdiff) <- "numeric"
  class(ruleset$mandatory) <- "numeric"
  class(ruleset$score) <- "numeric"
  class(ruleset$parent) <- "numeric"
  object@rules=ruleset
  object;
  
  return(object);
}
setDefaultLists <- function(object, lib.loc=.libPaths()) {
  
  ##Read Tabellen
  object@ionlistfile <- system.file('lists/ions.csv', package = "MetaboAnalystR",
                                    lib.loc=lib.loc)[1]
  if (!file.exists(object@ionlistfile)) stop('ions.csv not found.')
  
  object@neutrallossfile <- system.file('lists/neutralloss.csv', 
                                        package = "MetaboAnalystR", lib.loc=lib.loc)[1]
  if (!file.exists(object@neutrallossfile)) stop('neutralloss.csv not found.')
  
  object@neutraladditionfile <- system.file('lists/neutraladdition.csv', 
                                            package = "MetaboAnalystR", lib.loc=lib.loc)[1]
  if (!file.exists(object@neutraladditionfile)) stop('neutraladdition.csv not found.')
  object
}
readLists <- function(object) {
  
  object@ionlist <- read.table(object@ionlistfile, header=TRUE, dec=".", sep=",",
                               as.is=TRUE, stringsAsFactors = FALSE);
  
  object@neutralloss <- read.table(object@neutrallossfile, header=TRUE, dec=".", sep=",",
                                   as.is=TRUE, stringsAsFactors = FALSE);
  
  object@neutraladdition <- read.table(object@neutraladditionfile,
                                       header=TRUE, dec=".", sep=",",
                                       as.is=TRUE, stringsAsFactors = FALSE);            
  object
}
setParams <- function (object, maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, nh=2, 
                       polarity=NULL, lib.loc=NULL) {
  object@maxcharge=maxcharge 
  object@mol=mol
  object@nion=nion
  object@nnloss=nnloss 
  object@nnadd=nnadd 
  object@nh=nh
  object@polarity=polarity
  object@lib.loc=lib.loc
  object
}
annotateGrp <- function(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx) {
  #m/z vector for group i with peakindex ipeak
  mz     <- imz[ipeak];
  naIdx <- which(!is.na(mz))
  
  #Spectrum have only annotated isotope peaks, without monoisotopic peak
  #Give error or warning?
  if(length(na.omit(mz[naIdx])) < 1){
    return(NULL);
  }
  
  ML <- massDiffMatrix(mz[naIdx], rules[rules.idx,]);
  
  hypothese <- createHypothese(ML, rules[rules.idx, ], devppm, mzabs, naIdx);
  
  #create hypotheses
  if(is.null(nrow(hypothese)) || nrow(hypothese) < 2 ){
    return(NULL);
  }
  
  #remove hypotheses, which violates via isotope annotation discovered ion charge 
  if(length(isotopes) > 0){
    hypothese <- checkIsotopes(hypothese, isotopes, ipeak);
  }
  
  if(nrow(hypothese) < 2){
    return(NULL);
  }
  
  #Test if hypothese grps include mandatory ions
  #Filter Rules #2
  if(length(quasimolion) > 0){
    hypothese <- checkQuasimolion(hypothese, quasimolion);
  }
  
  if(nrow(hypothese) < 2){
    return(NULL);
  };
  
  #Entferne Hypothesen, welche gegen OID-Score&Kausalität verstossen!
  hypothese <- checkOidCausality(hypothese, rules[rules.idx, ]);
  if(nrow(hypothese) < 2){
    return(NULL);
  };
  
  #Prüfe IPS-Score
  hypothese <- checkIps(hypothese)
  if(nrow(hypothese) < 2){
    return(NULL)
  }
  
  #We have hypotheses and want to add neutral losses
  if("typ" %in% colnames(rules)){
    hypothese <- addFragments(hypothese, rules, mz)
    
    hypothese <- resolveFragmentConnections(hypothese)
  }
  return(hypothese);
}
massDiffMatrix <- function(m, rules){
  #m - m/z vector
  #rules - annotation rules
  nRules <- nrow(rules);
  DM   <- matrix(NA, length(m), nRules)
  
  for (i in seq_along(m)){
    for (j in seq_len(nRules)){
      DM[i, j] <- (abs(rules[j, "charge"] * m[i]) - rules[j, "massdiff"]) / rules[j, "nmol"]    # ((z*m) - add) /n
    }
  }
  return(DM)
}
createHypothese <- function(ML, rules, devppm, mzabs, naIdx){
  ML.nrow <- nrow(ML);
  ML.vec <- as.vector(ML);
  max.value <- max(round(ML, 0));
  hashmap <- vector(mode="list", length=max.value);
  for(i in 1:length(ML)){
    val <- trunc(ML[i],0);
    if(val>1){
      hashmap[[val]] <- c(hashmap[[val]],i);
    }
  }
  if("ips" %in% colnames(rules)){
    score <- "ips"
  }else{
    score <- "score"
  }
  hypothese <- matrix(NA,ncol=8,nrow=0);
  colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "score", "massgrp", "check");
  massgrp <- 1;
  
  for(i in seq(along=hashmap)){
    if(is.null(hashmap[[i]])){
      next;
    }
    
    candidates <- ML.vec[hashmap[[i]]];
    candidates.index <- hashmap[[i]];
    if(i != 1 && !is.null(hashmap[[i-1]]) && min(candidates) < i+(2*devppm*i+mzabs)){
      index <- which(ML.vec[hashmap[[i-1]]]> i-(2*devppm*i+mzabs))
      if(length(index)>0) {
        candidates <- c(candidates, ML.vec[hashmap[[i-1]]][index]);
        candidates.index <- c(candidates.index,hashmap[[i-1]][index]);
      }    
    }
    
    if(length(candidates) < 2){
      next;
    }
    
    tol <- max(2*devppm*mean(candidates, na.rm=TRUE))+ mzabs;
    result <- cutree(hclust(dist(candidates)), h=tol);
    index <- which(table(result) >= 2);
    if(length(index) == 0){
      next;
    }
    m <- lapply(index, function(x) which(result == x));
    for(ii in 1:length(m)){
      ini.adducts <- candidates.index[m[[ii]]];
      for( iii in 1:length(ini.adducts)){
        adduct <- ini.adducts[iii] %/% ML.nrow +1;
        mass   <- ini.adducts[iii] %% ML.nrow;
        if(mass == 0){
          mass <- ML.nrow;
          adduct <- adduct -1;
        }
        hypothese <- rbind(hypothese, c(naIdx[mass], adduct, rules[adduct, "nmol"], rules[adduct, "charge"], mean(candidates[m[[ii]]]),  rules[adduct,score],massgrp ,1));
      }
      if(length(unique(hypothese[which(hypothese[, "massgrp"] == massgrp), "massID"])) == 1){
        ##only one mass annotated
        hypothese <- hypothese[-(which(hypothese[,"massgrp"]==massgrp)),,drop=FALSE]
      }else{
        massgrp <- massgrp +1;
      }
    }
  }
  return(hypothese);
}
checkIsotopes <- function(hypothese, isotopes, ipeak){
  for(hyp in 1:nrow(hypothese)){
    peakid <- ipeak[hypothese[hyp, 1]];
    if(!is.null(isotopes[[peakid]])){
      #Isotope da
      explainable <- FALSE;
      if(isotopes[[peakid]]$charge == abs(hypothese[hyp, "charge"])){
        explainable <- TRUE;
      }
      if(!explainable){
        #delete Rule
        hypothese[hyp,"check"]=0;
      }
    }
  }
  hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ,drop=FALSE];
  #check if hypothese grps annotate at least two different peaks
  hypothese <- checkHypothese(hypothese)
  
  return(hypothese)
}
checkHypothese <- function(hypothese){
  if(is.null(nrow(hypothese))){
    hypothese <- matrix(hypothese, byrow=F, ncol=8)
  } 
  colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "score", "massgrp", "check")
  for(i in unique(hypothese[,"massgrp"])){
    if(length(unique(hypothese[which(hypothese[, "massgrp"] == i), "massID"])) == 1){
      ##only one mass annotated
      hypothese <- hypothese[-(which(hypothese[,"massgrp"]==i)), , drop=FALSE]
    } 
  }
  return(hypothese)
}

checkIps <- function(hypothese){
  for(hyp in 1:nrow(hypothese)){
    if(length(which(hypothese[, "massgrp"] == hypothese[hyp, "massgrp"])) < 2){
      hypothese[hyp, "check"] = 0;
    }
  }
  hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ];
  if(is.null(nrow(hypothese))) {
    hypothese <- matrix(hypothese, byrow=F, ncol=9)
  }
  if(nrow(hypothese) < 1){
    colnames(hypothese)<-c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips","massgrp", "check")
    return(hypothese)
  }
  for(hyp in 1:nrow(hypothese)){
    if(length(id <- which(hypothese[, "massID"] == hypothese[hyp, "massID"] & hypothese[, "check"] != 0)) > 1){
      masses <- hypothese[id, "mass"]
      nmasses <- sapply(masses, function(x) { 
        sum(hypothese[which(hypothese[, "mass"] == x), "score"]) 
      })
      masses <- masses[-which(nmasses == max(nmasses))];
      if(length(masses) > 0){
        hypothese[unlist(sapply(masses, function(x) {which(hypothese[, "mass"]==x)})), "check"]=0;
      }
    }
  }
  
  hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ,drop=FALSE];
  #check if hypothese grps annotate at least two different peaks
  hypothese <- checkHypothese(hypothese)
  return(hypothese)
}
checkOidCausality <- function(hypothese,rules){
  #check every hypothese grp
  for(hyp in unique(hypothese[,"massgrp"])){
    hyp.nmol <- which(hypothese[, "massgrp"] == hyp & hypothese[, "nmol"] > 1)
    
    for(hyp.nmol.idx in hyp.nmol){
      if(length(indi <- which(hypothese[, "mass"] == hypothese[hyp.nmol.idx, "mass"] & 
                              abs(hypothese[, "charge"]) == hypothese[, "nmol"])) > 1){
        if(hyp.nmol.idx %in% indi){
          #check if [M+H] [2M+2H]... annotate the same molecule
          massdiff <- rules[hypothese[indi, "ruleID"], "massdiff"] / 
            rules[hypothese[indi, "ruleID"], "charge"]
          if(length(indi_new <- which(duplicated(massdiff))) > 0){
            hypothese[hyp.nmol.idx, "check"] <- 0;
          }
        }
      }
    }
  }
  
  hypothese <- hypothese[which(hypothese[, "check"] == TRUE), ,drop=FALSE];
  #check if hypothese grps annotate at least two different peaks
  hypothese <- checkHypothese(hypothese)
  return(hypothese)
}
checkQuasimolion <- function(hypothese, quasimolion){
  hypomass <- unique(hypothese[, "mass"])
  for(mass in 1:length(hypomass)){
    if(!any(quasimolion %in% hypothese[which(hypothese[, "mass"] == hypomass[mass]), "ruleID"])){
      hypothese[which(hypothese[, "mass"] == hypomass[mass]), "check"] = 0;
    }else if(is.null(nrow(hypothese[which(hypothese[, "mass"] == hypomass[mass]), ]))){
      hypothese[which(hypothese[, "mass"] == hypomass[mass]), "check"] = 0;
    }
  }
  
  hypothese <- hypothese[which(hypothese[, "check"]==TRUE), , drop=FALSE];
  #check if hypothese grps annotate at least two different peaks
  hypothese <- checkHypothese(hypothese)
  
  return(hypothese)
}

getderivativeIons <- function(annoID, annoGrp, rules, npeaks){
  #generate Vector length npeaks
  derivativeIons <- vector("list", npeaks);
  #intrinsic charge
  #TODO: Not working at the moment
  charge <- 0;
  
  #check if we have annotations
  if(nrow(annoID) < 1){
    return(derivativeIons);
  }
  
  for(i in 1:nrow(annoID)){
    
    peakid  <-  annoID[i, 1];
    grpid   <-  annoID[i, 2];
    ruleid  <-  annoID[i, 3];
    
    if(is.null(derivativeIons[[peakid]])){
      #Peak has no annotations so far
      if(charge == 0 | rules[ruleid, "charge"] == charge){
        derivativeIons[[peakid]][[1]] <- list( rule_id = ruleid, 
                                               charge = rules[ruleid, "charge"], 
                                               nmol = rules[ruleid, "nmol"], 
                                               name = paste(rules[ruleid, "name"]),
                                               mass = annoGrp[grpid, 2])
      }
    } else {
      #Peak has already an annotation
      if(charge == 0 | rules[ruleid, "charge"] == charge){
        derivativeIons[[peakid]][[(length(
          derivativeIons[[peakid]])+1)]] <- list( rule_id = ruleid, 
                                                  charge = rules[ruleid, "charge"],
                                                  nmol = rules[ruleid, "nmol"],
                                                  name=paste(rules[ruleid, "name"]),
                                                  mass=annoGrp[grpid, 2])
      }
    }
    
    charge <- 0;
  }
  return(derivativeIons);
}
getIsotopeCluster <- function(object, number=NULL, value="maxo", 
                              sampleIndex=NULL){
  
  #check values
  if(is.null(object)) { 
    stop("No xsa argument was given.\n"); 
  }else if(!class(object)=="xsAnnotate"){
    stop("Object parameter is no xsAnnotate object.\n");
  }
  
  value <- match.arg(value, c("maxo", "into", "intb"), several.ok=FALSE)
  
  if(!is.null(number) & !is.numeric(number)){
    stop("Number must be NULL or numeric");
  }
  
  if(!is.null(sampleIndex) & !all(is.numeric(sampleIndex))){
    stop("Parameter sampleIndex must be NULL or numeric");
  }
  
  if(is.null(sampleIndex)){
    nSamples <- 1;
  } else if( all(sampleIndex <= length(object@xcmsSet@filepaths) & sampleIndex > 0)){
    nSamples <- length(sampleIndex);
  } else {
    stop("All values in parameter sampleIndex must be lower equal 
         the number of samples and greater than 0.\n")
  }
  
  if(length(sampnames(object@xcmsSet)) > 1){  ## more than one sample
    gvals <- groupval(object@xcmsSet, value=value);
    groupmat <- object@groupInfo;
    iso.matrix <- matrix(0, ncol=nSamples, nrow=length(object@isotopes));
    if(is.null(sampleIndex)){
      for(i in 1:length(object@pspectra)){
        iso.matrix[object@pspectra[[i]],1] <- gvals[object@pspectra[[i]],object@psSamples[i]]; 
      }
    } else {
      for(i in 1:length(object@pspectra)){
        iso.matrix[object@pspectra[[i]], ] <- gvals[object@pspectra[[i]], sampleIndex]
      }
    }
    peakmat <- cbind(groupmat[, "mz"], iso.matrix );
    rownames(peakmat) <- NULL;
    if(is.null(sampleIndex)){
      colnames(peakmat) <- c("mz",value);
    }else{
      colnames(peakmat) <- c("mz", sampnames(object@xcmsSet)[sampleIndex]);
    }
    
    if(any(is.na(peakmat))){
      print("Warning: peak table contains NA values. To remove apply fillpeaks on xcmsSet.");
    }
    
  } else if(length(sampnames(object@xcmsSet)) == 1){  ## only one sample was 
    peakmat <- object@groupInfo[, c("mz", value)];
  } else { 
    stop("sampnames could not extracted from the xcmsSet.\n"); 
  }
  
  #collect isotopes
  
  index <- which(!sapply(object@isotopes, is.null));
  
  tmp.Matrix <- cbind(index, matrix(unlist(object@isotopes[index]), ncol=4, byrow=TRUE))
  colnames(tmp.Matrix) <- c("Index","IsoCluster","Type","Charge","Val")
  
  max.cluster <- max(tmp.Matrix[,"IsoCluster"])
  max.type    <- max(tmp.Matrix[,"Type"])
  
  isotope.Matrix <- matrix(NA, nrow=max.cluster, ncol=(max.type+2));
  invisible(apply(tmp.Matrix,1, function(x) {
    isotope.Matrix[x["IsoCluster"],x["Type"]+2] <<- x["Index"];
    isotope.Matrix[x["IsoCluster"],1] <<- x["Charge"];
  }))
  
  invisible(apply(isotope.Matrix,1, function(x) {
    list(peaks=peakmat[na.omit(x[-1]),],charge=x[1])
  }))
}

naOmit <- function(x) {
  return (x[!is.na(x)]);
}
####### ------------ ======== Bottom of this kit ========= ------------ ######\

# --------------------------------------5.MSread--------------------------
read.MSdata <- function(files, pdata = NULL, msLevel. = NULL, centroided. = NA,
                        smoothed. = NA, cache. = 1L,
                        mode = c("inMemory", "onDisk")) {
  mode <- match.arg(mode)
  ## o normalize the file path, i.e. replace relative path with absolute
  ##   path. That fixes possible problems on Windows with SNOW parallel
  ##   processing and also proteowizard problems on unis system with ~ paths.
  files <- normalizePath(files)
  suppressWarnings(.hasChroms <- MSnbase::hasChromatograms(files))

  if (!length(files)) {
    process <- new("MSnProcess",
                   processing = paste("No data loaded:", date()))
    if (mode == "inMemory")
      res <- new("MSnExp",
                 processingData = process)
    else res <- new("OnDiskMSnExp",
                    processingData = process)
  } else {
    if (mode == "inMemory") {
      if (is.null(msLevel.)) msLevel. <- 2L
      res <- read.InMemMSd.data(files, pdata = pdata, msLevel. = msLevel.,
                               centroided. = centroided., smoothed. = smoothed., cache. = cache.)
    } else { ## onDisk
      res <- read.OnDiskMS.data(files = files, pdata = pdata,
                                msLevel. = msLevel., centroided. = centroided., smoothed. = smoothed.)
    }
  }
  res
}

read.InMemMSd.data <- function(files, pdata, msLevel., centroided., smoothed., cache. = 1) {
  MSnbase:::.testReadMSDataInput(environment())
  if (msLevel. == 1) cache. <- 0
  msLevel. <- as.integer(msLevel.)
  ## Creating environment with Spectra objects
  assaydata <- new.env(parent = emptyenv())
  ioncount <- c()
  ioncounter <- 1
  filenams <- filenums <- c()
  fullhd2 <- fullhdorder <- c()
  fullhdordercounter <- 1
  .instrumentInfo <- list()
  ## List eventual limitations
  if (MSnbase:::isCdfFile(files)) {
    message("Polarity can not be extracted from netCDF files, please set ",
            "manually the polarity with the 'polarity' method.")
  }
  ## ## Idea:
  ## ## o initialize a featureData-data.frame,
  ## ## o for each file, extract header info and put that into
  ##      featureData;
  
  for (f in files) {
    print(paste("Reading MS from",basename(f),"begin !"))
    
    filen <- match(f, files)
    filenums <- c(filenums, filen)
    filenams <- c(filenams, f)
    ## issue #214: define backend based on file format.
    msdata <- mzR::openMSfile(f,backend = NULL)
    .instrumentInfo <- c(.instrumentInfo, list(mzR::instrumentInfo(msdata)))
    fullhd <- mzR::header(msdata)
    ## Issue #325: get centroided information from file, but overwrite if
    ## specified with centroided. parameter.
    if (!is.na(centroided.))
      fullhd$centroided <- as.logical(centroided.)
    spidx <- which(fullhd$msLevel == msLevel.)
    ## increase vectors as needed
    ioncount <- c(ioncount, numeric(length(spidx)))
    fullhdorder <- c(fullhdorder, numeric(length(spidx)))
    if (msLevel. == 1) {
      if (length(spidx) == 0)
        stop("No MS1 spectra in file",f)
      
      pb <- progress_bar$new(format = "Reading [:bar] :percent Time left: :eta", 
                             total = length(spidx), clear = T, width= 75)
      
      for (i in 1:length(spidx)) {
        
        pb$tick();
        
        j <- spidx[i]
        hd <- fullhd[j, ]
        ## Fix missing polarity from netCDF
        pol <- hd$polarity
        if (length(pol) == 0)
          pol <- NA
        .p <- mzR::peaks(msdata, j)
        sp <- new("Spectrum1",
                  rt = hd$retentionTime,
                  acquisitionNum = as.integer(hd$acquisitionNum),
                  scanIndex = as.integer(hd$seqNum),
                  tic = hd$totIonCurrent,
                  mz = .p[, 1],
                  intensity = .p[, 2],
                  fromFile = as.integer(filen),
                  centroided = as.logical(hd$centroided),
                  smoothed = as.logical(smoothed.),
                  polarity = as.integer(pol))
        ## peaksCount
        ioncount[ioncounter] <- sum(.p[, 2])
        ioncounter <- ioncounter + 1
        .fname <-MSnbase:::formatFileSpectrumNames(fileIds=filen,
                                          spectrumIds=i,
                                          nSpectra=length(spidx),
                                          nFiles=length(files))
        assign(.fname, sp, assaydata)
        fullhdorder[fullhdordercounter] <- .fname
        fullhdordercounter <- fullhdordercounter + 1
      }
    } else { ## .msLevel != 1
      if (length(spidx) == 0)
        stop("No MS(n>1) spectra in file", f)
      print(paste("Reading ", length(spidx), " MS", msLevel.,
                  " spectra from file ", basename(f)))
     
      scanNums <- fullhd[fullhd$msLevel == msLevel., "precursorScanNum"]
      if (length(scanNums) != length(spidx))
        stop("Number of spectra and precursor scan number do not match!")
      
      pb <- progress_bar$new(format = "Reading [:bar] :percent Time left: :eta", 
                             total = length(spidx), clear = T, width= 75)
      
      for (i in 1:length(spidx)) {
        
        pb$tick();
        
        j <- spidx[i]
        hd <- fullhd[j, ]
        .p <- mzR::peaks(msdata, j)
        sp <- new("Spectrum2",
                  msLevel = as.integer(hd$msLevel),
                  merged = as.numeric(hd$mergedScan),
                  precScanNum = as.integer(scanNums[i]),
                  precursorMz = hd$precursorMZ,
                  precursorIntensity = hd$precursorIntensity,
                  precursorCharge = as.integer(hd$precursorCharge),
                  collisionEnergy = hd$collisionEnergy,
                  rt = hd$retentionTime,
                  acquisitionNum = as.integer(hd$acquisitionNum),
                  scanIndex = as.integer(hd$seqNum),
                  tic = hd$totIonCurrent,
                  mz = .p[, 1],
                  intensity = .p[, 2],
                  fromFile = as.integer(filen),
                  centroided = as.logical(hd$centroided),
                  smoothed = as.logical(smoothed.),
                  polarity = as.integer(hd$polarity))
        ## peaksCount
        ioncount[ioncounter] <- sum(.p[, 2])
        ioncounter <- ioncounter + 1
        .fname <- MSnbase:::formatFileSpectrumNames(fileIds=filen,
                                          spectrumIds=i,
                                          nSpectra=length(spidx),
                                          nFiles=length(files))
        assign(.fname, sp, assaydata)
        fullhdorder[fullhdordercounter] <- .fname
        fullhdordercounter <- fullhdordercounter + 1
      }
    }
    if (cache. >= 1)
      fullhd2 <- rbind(fullhd2, fullhd[spidx, ])
    
    gc()
    mzR::close(msdata)
    rm(msdata);
    
    
    print(paste("This reading finished !"))
    
  }
  
  ## cache level 2 yet implemented
  cache. <- MSnbase:::testCacheArg(cache., maxCache = 2)
  if (cache. >= 1) {
    fl <- sapply(assaydata, function(x) x@fromFile)
    featnms <- ls(assaydata) ## feature names in final MSnExp
    fl <- fl[featnms] ## reorder file numbers
    stopifnot(all(base::sort(featnms) == base::sort(fullhdorder)))
    fullhdorder <- match(featnms, fullhdorder)
    tmphd <- fullhd2[fullhdorder, ] ## reorder
    ioncount <- ioncount[fullhdorder]
    newhd <- data.frame(fileIdx = fl,
                        retention.time = tmphd$retentionTime,
                        precursor.mz = tmphd$precursorMZ,
                        precursor.intensity = tmphd$precursorIntensity,
                        charge = tmphd$precursorCharge,
                        peaks.count = tmphd$peaksCount,
                        tic = tmphd$totIonCurrent,
                        ionCount = ioncount,
                        ms.level = tmphd$msLevel,
                        acquisition.number = tmphd$acquisitionNum,
                        collision.energy = tmphd$collisionEnergy)
  } else {
    newhd <- NULL ## not used anyway
  }
  .cacheEnv <- MSnbase:::setCacheEnv(list("assaydata" = assaydata,
                                "hd" = newhd),
                           cache., lock = TRUE)
  ## CACHING AS BEEN SUPERSEDED BY THE OnDiskMSnExp IMPLEMENTATION
  ## if cache==2, do not lock assign msdata in .cacheEnv then lock
  ## it and do not close(msdata) above; rm(msdata) is OK
  
  ## Create 'MSnProcess' object
  process <- new("MSnProcess",
                 processing = paste("Data loaded:", date()),
                 files = files,
                 smoothed = smoothed.)
  ## Create 'fdata' and 'pdata' objects
  nms <- ls(assaydata)
  if (is.null(pdata)) {
    .pd <- data.frame(sampleNames = basename(files))
    rownames(.pd) <- .pd$sampleNames
    pdata <- new("AnnotatedDataFrame",
                 data = .pd)
  }
  fdata <- new("AnnotatedDataFrame",
               data = data.frame(
                 spectrum = 1:length(nms),
                 row.names = nms))
  fdata <- fdata[ls(assaydata)] ## reorder features
  ## expriment data slot
  if (length(.instrumentInfo) > 1) {
    cmp <- length(unique(sapply(.instrumentInfo, "[[", 1)))
    if (cmp > 1)
      message("According to the instrument information in the files,\n",
              "the data has been acquired on different instruments!")
    for (nm in names(.instrumentInfo[[1]]))
      .instrumentInfo[[1]][[nm]] <- sapply(.instrumentInfo, "[[", nm)
  }
  expdata <- new("MIAPE",
                 instrumentManufacturer = .instrumentInfo[[1]]$manufacturer,
                 instrumentModel = .instrumentInfo[[1]]$model,
                 ionSource = .instrumentInfo[[1]]$ionisation,
                 analyser = as.character(.instrumentInfo[[1]]$analyzer),
                 detectorType = .instrumentInfo[[1]]$detector)
  ## Create and return 'MSnExp' object

  toReturn <- new("MSnExp",
                  assayData = assaydata,
                  phenoData = pdata,
                  featureData = fdata,
                  processingData = process,
                  experimentData = expdata,
                  .cache = .cacheEnv)
  return(toReturn)
}
read.OnDiskMS.data <- function(files, pdata, msLevel., centroided., smoothed.) {
  MSnbase:::.testReadMSDataInput(environment())
  stopifnot(is.logical(centroided.))
  
  ## Creating environment with Spectra objects
  assaydata <- new.env(parent = emptyenv())
  filenams <- filenums <- c()
  fullhd2 <- fullhdorder <- c()
  fullhdordercounter <- 1
  .instrumentInfo <- list()
  ## List eventual limitations
  if (MSnbase:::isCdfFile(files)) {
    message("Polarity can not be extracted from netCDF files, please set ",
            "manually the polarity with the 'polarity' method.")
  }
  ## Idea:
  ## o initialize a featureData-data.frame,
  featureDataList <- list()
  ## o for each file, extract header info and put that into featureData
  pb <- progress_bar$new(format = "Reading [:bar] :percent Time left: :eta", 
                         total = length(spidx), clear = T, width= 75)

  for (f in files) {
    
    pb$tick();
    
    filen <- match(f, files)
    filenums <- c(filenums, filen)
    filenams <- c(filenams, f)
    ## issue #214: define backend based on file format.
    msdata <- mzR::openMSfile(f,backend = NULL)
    .instrumentInfo <- c(.instrumentInfo, list(mzR::instrumentInfo(msdata)))
    fullhd <- mzR::header(msdata)
    spidx <- seq_len(nrow(fullhd))
    
    ## Don't read the individual spectra, just define the names of
    ## the spectra.
    fullhdorder <- c(fullhdorder,
                     MSnbase:::formatFileSpectrumNames(fileIds=filen,
                                             spectrumIds=seq_along(spidx),
                                             nSpectra=length(spidx),
                                             nFiles=length(files)))
    ## Extract all Spectrum info from the header and put it into the featureData
    fdData <- fullhd[spidx, , drop = FALSE]
    ## rename totIonCurrent and peaksCount, as detailed in
    ## https://github.com/lgatto/MSnbase/issues/105#issuecomment-229503816
    names(fdData) <- sub("peaksCount", "originalPeaksCount", names(fdData))
    ## Add also:
    ## o fileIdx -> links to fileNames property
    ## o spIdx -> the index of the spectrum in the file.
    fdData <- cbind(fileIdx = rep(filen, nrow(fdData)),
                    spIdx = spidx,
                    smoothed = rep(as.logical(smoothed.), nrow(fdData)),
                    fdData, stringsAsFactors = FALSE)
    if (MSnbase:::isCdfFile(f)) {
      ## Add the polarity columns if missing in netCDF
      if (!any(colnames(fdData) == "polarity"))
        fdData <- cbind(fdData, polarity = rep(as.integer(NA),
                                               nrow(fdData)))
    }
    ## Order the fdData by acquisitionNum to force use of acquisitionNum
    
    ## as unique ID for the spectrum (issue #103). That way we can use
    ## the spIdx (is the index of the spectrum within the file) for
    ## subsetting and extracting.
    if (!all(sort(fdData$acquisitionNum) == fdData$acquisitionNum))
      warning(paste("Unexpected acquisition number order detected.",
                    "Please contact the maintainers or open an issue",
                    "on https://github.com/lgatto/MSnbase.",
                    sep = "\n")) ## see issue #160
    fdData <- fdData[order(fdData$acquisitionNum), ]
    featureDataList <- c(featureDataList, list(fdData))
    ## Fix for #151; would be nice if we could remove that at some point.
    gc()
    mzR::close(msdata)
    rm(msdata)
  }
  ## new in version 1.9.8
  lockEnvironment(assaydata, bindings = TRUE)
  .cacheEnv <- setCacheEnv(list("assaydata" = assaydata,
                                "hd" = NULL),
                           level = 0,
                           lock = TRUE)
  
  ## Create 'MSnProcess' object
  process <- new("MSnProcess",
                 processing = paste0("Data loaded [", date(), "]"),
                 files = files,
                 smoothed = NA)
  ## Create 'fdata' and 'pdata' objects
  if (is.null(pdata)) {
    .pd <- data.frame(sampleNames = basename(files))
    rownames(.pd) <- .pd$sampleNames
    pdata <- new("AnnotatedDataFrame",
                 data = .pd)
  }
  ## If we've got a featureDataList, use it
  if (length(featureDataList) > 0) {
    fdata <- do.call(rbind, featureDataList)
    fdata <- cbind(fdata, spectrum = 1:nrow(fdata),
                   stringsAsFactors = FALSE)
    ## Setting rownames on the data.frame not on the AnnotatedDataFrame;
    ## did get strange errors otherwise.
    rownames(fdata) <- fullhdorder
    ## Re-order them
    fdata <- fdata[base::sort(fullhdorder), ]
    fdata <- new("AnnotatedDataFrame", data = fdata)
    ## Re-order the features.
    ## fdata <- fdata[ls(assaydata), ]
  } else fdata <- new("AnnotatedDataFrame")
  
  ## expriment data slot
  if (length(.instrumentInfo) > 1) {
    cmp <- length(unique(sapply(.instrumentInfo, "[[", 1)))
    if (cmp > 1)
      message("According to the instrument information in the files,\n",
              "the data has been acquired on different instruments!")
    for (nm in names(.instrumentInfo[[1]]))
      .instrumentInfo[[1]][[nm]] <- sapply(.instrumentInfo, "[[", nm)
  }
  expdata <- new("MIAPE",
                 instrumentManufacturer = .instrumentInfo[[1]]$manufacturer,
                 instrumentModel = .instrumentInfo[[1]]$model,
                 ionSource = .instrumentInfo[[1]]$ionisation,
                 analyser = as.character(.instrumentInfo[[1]]$analyzer),
                 detectorType = .instrumentInfo[[1]]$detector)
  ## Create ProcessingStep if needed.
  ## Create the OnDiskMSnExp object.
  res <- new("OnDiskMSnExp",
             assayData = assaydata,
             phenoData = pdata,
             featureData = fdata,
             processingData = process,
             experimentData = expdata,
             .cache  =  .cacheEnv)
  if (!is.null(msLevel.)) {
    msLevel. <- as.integer(msLevel.)
    res <- filterMsLevel(res, msLevel.)
  }
  if (any(!is.na(centroided.))) {
    if (length(centroided.) == 1) {
      centroided(res) <- centroided.
    } else {
      for (i in seq_along(centroided.))
        centroided(res, msLevel. = i) <- centroided.[i]
    }
  }
  return(res)
}
xia-lab/MetaboAnalystR3.0 documentation built on May 6, 2020, 11:03 p.m.