R/Spectra_Utils.R

Defines functions .replace.by.lod .getDataPath naOmit valueCount2ScanIndex .exportmSetRuleClass .insertColumn .createProfileMatrix profMat getderivativeIons checkQuasimolion checkOidCausality checkIps checkHypothese checkIsotopes createHypothese massDiffMatrix annotateGrp readLists setDefaultLists sampclass getPeaklist findAdducts calcPC.hcs calcCiS mpi.comm.size combineCalc fastMatchR getAllPeakEICs create.matrix addFragments resolveFragmentConnections findIsotopesPspec calcIsotopeMatrix is.wholenumber groupval getPeaks_selection .getDdaMS2Scans PeakPicking_core PeakPicking_prep filtfft .aggregateMethod2int .get_closest_index .groupval .feature_values .getChromPeakData .applyRtAdjustment .applyRtAdjToChromPeaks .peakIndex .getPeakGroupsRtMatrix .rawMat .narrow_rt_boundaries .match_trim_vectors .match_trim_vector_index na.flatfill fitGauss gauss cent gaussCoverage trimm joinOverlappingPeaks descendMinTol valueCount2ScanIndex MSW.getRidge MSW.getLocalMaximumCWT MSW.extendLength MSW.extendNBase getLocalNoiseEstimate OptiChromPeaks adjustRtimeSubset updateRawSpectraPath creatPeakTable mSet2xcmsSet PerformBlankSubstraction PerformPeakFiling mSet.obiwarp RT.Adjust_peakGroup adjustRtime_obiwarp adjustRtime_peakGroup PerformRTcorrection PerformPeakAlignment Densitygrouping_slave PerformPeakGrouping PeakPicking_MatchedFilter_slave PeakPicking_Massifquant_slave PeakPicking_centWave_slave PerformPeakPicking

Documented in PerformPeakAlignment PerformPeakFiling PerformPeakPicking updateRawSpectraPath

# Content of this script

# 1. raw data function - xcms 
# 2. Peaks Peaking Function - parameters optimization
# 3. MS/MS processing - (moved from preproc_utils)
# 4. Peak Annotation - CAMERA
# 5. MS File Reading - MSnbase

# --------------------1. Raw spectra processing_Section using the xcms way---------------------------------------------

#' @title PerformPeakPicking
#' @description This funciton is used to Perform Peak Picking on an object generated by ImportRawMSdata
#' @param mSet the raw data object read by ImportRawMSData function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam(). Optional.
#' @export
#' @return will return an mSet object with peaks picked
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @examples
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#'                full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' # mSet <- PerformPeakPicking(mSet);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)

PerformPeakPicking<-function(mSet, BPPARAM = bpparam()){
  
  if(missing(mSet) & file.exists("mSet.rda")){
    load("mSet.rda")
  } else if(missing(mSet) & !file.exists("mSet.rda")){
    if(.on.public.web()){
      MessageOutput("ERROR: mSet is missing !", NULL, NULL);
      stop();
    } else {
      stop("mSet is missing ! Please make sure mSet is well defined !");
    }
  }
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  object <- mSet@rawOnDisk;
  param <- mSet@params;
  
  if(length(object) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
    } else {
      stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
    }
  }
  
  object_mslevel <- MSnbase::filterMsLevel(
    MSnbase::selectFeatureData(object,
                               fcol = c(.MSnExpReqFvarLabels,
                                        "centroided")), msLevel. = 1)
  

  # Splite the MS data 
  object_mslevel <- lapply(seq_along(fileNames(object_mslevel)),
                           FUN = filterFile,
                           object = object_mslevel)
  
  param$.optimize_switch <- .SwapEnv$.optimize_switch;
  
  # Peak picking runnning - centWave mode
  if (param$Peak_method == "centWave")
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_centWave_slave, # slave Function of centWave
                        param = param, BPPARAM = BPPARAM)
  
  
  # Peak picking runnning - Massifquant mode
  if (param$Peak_method == "Massifquant")
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_Massifquant_slave, # slave Function of Massifquant
                        param = param, BPPARAM = BPPARAM)
  
  
  # Peak picking runnning - MatchedFilter mode
  if (param$Peak_method == "matchedFilter"){
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_MatchedFilter_slave, # slave Function of MatchedFilter
                        param = param, BPPARAM = BPPARAM)
  }
  
  
  # if (param$Peak_method != "matchedFilter" && param$Peak_method != "centWave"){
  #   stop("Only 'centWave' and 'MatchedFilter' are suppoted now !")
  # }
  
  # Picking Cluster
  
  ##### PEAK SUMMARY------------
  
  pks <- vector("list", length(resList))
  for (i in seq_along(resList)) {
    n_pks <- nrow(resList[[i]])
    if (is.null(n_pks))
      n_pks <- 0
    if (n_pks == 0) {
      pks[[i]] <- NULL
      if(!.SwapEnv$.optimize_switch){
        warning("No peaks found in sample number ", i, ".")
      }
    } else {
      pks[[i]] <- cbind(resList[[i]], sample = rep.int(i, n_pks))
    }
    
  } 
  pks <- do.call(rbind, pks)
  
  # Make the chrompeaks embeded
  newFd <- list()
  #newFd@.xData <- as.environment(as.list(object@msFeatureData, all.names = TRUE))
  rownames(pks) <- sprintf(paste0("CP", "%0", ceiling(log10(nrow(pks) + 1L)), "d"),
                           seq(from = 1, length.out = nrow(pks)))
  
  newFd$chromPeaks <- pks
  newFd$chromPeakData <- S4Vectors::DataFrame(ms_level = rep(1L, nrow(pks)), is_filled = rep(FALSE, nrow(pks)),
                                              row.names = rownames(pks))
  
  ## mSet Generation
  mSet@peakpicking <- newFd;
  
  # Stop parallel
  register(bpstop());
  
  return(mSet)
}

#' @title PeakPicking_centWave_slave
#' @description PeakPicking_centWave_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_centWave_slave <- function(x, param){
  
  if(!exists(".optimize_switch")){
    if(!is.null(param$.optimize_switch)){
      .optimize_switch <- param$.optimize_switch;
    } else {
      .optimize_switch <- TRUE;
    }
   
  }
 
  # load necessary C code for data processing
  # for raw data processing
  if (is(x, "OnDiskMSnExp")){
    scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
    rt <- MSnbase::rtime(x);
    filenamevalue <- basename(x@processingData@files)
  }
  
  # for parameters optimization
  if (is(x,"list")) {
    scan.set <- x;
    rt <- unlist(lapply(x,  MSnbase::rtime), use.names = FALSE);
    filenamevalue <- NA;
  }
  
  mzs <- lapply(scan.set, MSnbase::mz)
  vals_per_spect <- lengths(mzs, FALSE)
  
  
  #if (any(vals_per_spect == 0))
  #  mzs <- mzs[-which(vals_per_spect == 0)];
  #  x <- x[-which(vals_per_spect == 0)]; # Cannot be so easy !
  #  warning("Found empty spectra Scan. Filtered Automatically !")
  
  mz = unlist(mzs, use.names = FALSE);
  int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
  valsPerSpect = vals_per_spect;
  scantime = rt;
  
  ######   centWaveCore Function
  
  valCount <- cumsum(valsPerSpect);
  scanindex <- as.integer(c(0, valCount[-length(valCount)])); ## Get index vector for C calls
  
  if (!is.double(mz))
    mz <- as.double(mz)
  if (!is.double(int))
    int <- as.double(int)
  ## Fix the mzCenterFun
  #mzCenterFun <- paste("mzCenter",
  #                     gsub(mzCenterFun, pattern = "mzCenter.",
  #                          replacement = "", fixed = TRUE), sep=".")
  
  basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax",
                 "into", "intb", "maxo", "sn")
  verbosenames <- c("egauss", "mu", "sigma", "h", "f", "dppm", "scale",
                    "scpos", "scmin", "scmax", "lmin", "lmax")
  
  ## Peak width: seconds to scales
  peakwidth <- param$peakwidth
  scalerange <- round((peakwidth / mean(diff(scantime))) / 2)
  
  if (length(z <- which(scalerange == 0)))
    scalerange <- scalerange[-z]
  if (length(scalerange) < 1) {
    warning("No scales? Please check peak width!")
    if (param$verboseColumns) {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
                          length(verbosenames))
      colnames(nopeaks) <- c(basenames, verbosenames)
    } else {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames))
      colnames(nopeaks) <- c(basenames)
    }
    return(invisible(nopeaks))
  }
  
  if (length(scalerange) > 1){
    scales <- seq(from = scalerange[1], to = scalerange[2], by = 2)
  } else{
    scales <- scalerange
  }
  
  minPeakWidth <-  scales[1]
  noiserange <- c(minPeakWidth * 3, max(scales) * 3)
  maxGaussOverlap <- 0.5
  minPtsAboveBaseLine <- max(4, minPeakWidth - 2)
  minCentroids <- minPtsAboveBaseLine
  scRangeTol <-  maxDescOutlier <- floor(minPeakWidth / 2)
  scanrange <- c(1, length(scantime))

  ## If no ROIs are supplied then search for them.
  roiList <- list()
  if (length(roiList) == 0) {
    if (.on.public.web() & !.optimize_switch) {
      # Do nothing
    } else if(.optimize_switch) {
      # Do nothing
    } else {
      message("ROI searching under ", param$ppm, " ppm ... ", appendLF = FALSE)
    }
    
    withRestarts(
      tryCatch({
        tmp <- capture.output(roiList <-
                                findmzROIR (mz, int, scanindex, scanrange, scantime, param,
                                           minCentroids))
      },
      error = function(e) {
        if (grepl("m/z sort assumption violated !", e$message)) {
          invokeRestart("fixSort")
        } else {
          simpleError(e)
        }
      }),
      fixSort = function() {
        warning("m/z sort assumption violated !")
      }
    )
    
  }
  
  ## Second stage: process the ROIs
  peaklist <- list();
  Nscantime <- length(scantime)
  lf <- length(roiList)
  # save(roiList, file = paste0("roiList_",Sys.time(),".rda"))
  ## print('\n Detecting chromatographic peaks ... \n % finished: ')
  ## lp <- -1
  
  if (.on.public.web() & !.optimize_switch){
    
    print_mes <- paste0("Detecting peaks in ", length(roiList)," regions of interest of ", filenamevalue, " ...");    
    #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
    
    .SwapEnv$count_current_sample <- count_current_sample <- .SwapEnv$count_current_sample +1;
    count_total_sample <- .SwapEnv$count_total_sample;
    #write.table(count_current_sample, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
    write.table(25 + count_current_sample*3/count_total_sample*25, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
    
  } else if(.optimize_switch) {
    # do nothing
  } else {
    message("Detecting peaks in ", length(roiList),
            " regions of interest of ", filenamevalue, " ...", appendLF = FALSE)
  }
  
  roiScales = NULL;
 
  for (f in seq_len(lf)) {
    feat <- roiList[[f]]
    N <- feat$scmax - feat$scmin + 1
    peaks <- peakinfo <- NULL
    mzrange <- c(feat$mzmin, feat$mzmax)
    sccenter <- feat$scmin[1] + floor(N/2) - 1
    scrange <- c(feat$scmin, feat$scmax)
    ## scrange + noiserange, used for baseline detection and wavelet analysis
    sr <- c(max(scanrange[1], scrange[1] - max(noiserange)),
            min(scanrange[2], scrange[2] + max(noiserange)))
    
    eic <- getEICR (mz, int, scanindex, mzrange, sr)
    
    ## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr)
    d <- eic$intensity;
    td <- sr[1]:sr[2];
    scan.range <- c(sr[1], sr[2]);
    ## original mzROI range
    idxs <- which(eic$scan %in% seq(scrange[1], scrange[2]));
    mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]);
    ## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange)
    
    omz <- getMZR(mz, int, scanindex, mzrange, scrange, scantime);
    
    ## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange)
    if (all(omz == 0)) {
      warning("centWave: no peaks found in ROI.")
      next
    }
    od  <- mzROI.EIC$intensity
    otd <- mzROI.EIC$scan
    if (all(od == 0)) {
      warning("centWave: no peaks found in ROI.")
      next
    }
    
    ## scrange + scRangeTol, used for gauss fitting and continuous
    ## data above 1st baseline detection
    ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)],
                                                     scrange[2] + scRangeTol)
    fd <- d[match(ftd, td)]
    
    ## 1st type of baseline: statistic approach
    if (N >= 10*minPeakWidth) {
      ## in case of very long mass trace use full scan range
      ## for baseline detection
      
      noised <- getEICR(mz, int, scanindex, mzrange, scanrange)$intensity
      
      ## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity
    } else {
      noised <- d
    }
    ## 90% trimmed mean as first baseline guess
    gz <- which(noised > 0);
    
    if (length(gz) < 3*minPeakWidth){
      noise <- mean(noised)
    } else {
      noise <- mean(noised[gz], trim=0.05)
    }
    
    firstBaselineCheck = TRUE
    ## any continuous data above 1st baseline ?
    if (firstBaselineCheck &
        !continuousPtsAboveThresholdR(fd, threshold = noise,
                                     num = minPtsAboveBaseLine)){
      # save(firstBaselineCheck, file = paste0("sss_",Sys.time(),".rda"))
      next
    }
    
    ## 2nd baseline estimate using not-peak-range
    lnoise <- getLocalNoiseEstimate(d, td, ftd, noiserange, Nscantime,
                                    threshold = noise,
                                    num = minPtsAboveBaseLine)
    # 
    # if(.optimize_switch){
    #   write.table(paste0("WK_4_",f,"_",Sys.time()), file = "tmp_progress.txt",row.names = FALSE,quote = FALSE,col.names = FALSE,append = TRUE,eol ="\n")
    # }
    ## Final baseline & Noise estimate
    baseline <- max(1, min(lnoise[1], noise))
    sdnoise <- max(1, lnoise[2])
    sdthr <-  sdnoise * param$snthresh
    ## is there any data above S/N * threshold ?
    if (!(any(fd - baseline >= sdthr)))
      next
    wCoefs <- MSW.cwt(d, scales = scales, wavelet = 'mexh');
    
    # if(.optimize_switch){
    #   write.table(paste0("WK_5_",f,"_",Sys.time()), file = "tmp_progress.txt",row.names = FALSE,quote = FALSE,col.names = FALSE,append = TRUE,eol ="\n")
    # }
    
    if (!(!is.null(dim(wCoefs)) && any(wCoefs- baseline >= sdthr)))
      next
    if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise
      wCoefs[nrow(wCoefs),] <- wCoefs[nrow(wCoefs) - 1, ] * 0.99
    localMax <- MSW.getLocalMaximumCWT(wCoefs);
    
    rL <- MSW.getRidge(localMax)
    wpeaks <- sapply(rL,
                     function(x) {
                       w <- min(seq_along(x),ncol(wCoefs))
                       any(wCoefs[x,w]- baseline >= sdthr)
                     })
    if (any(wpeaks)) {
      wpeaksidx <- which(wpeaks)
      ## check each peak in ridgeList
      for (p in seq_along(wpeaksidx)) {
        opp <- rL[[wpeaksidx[p]]]
        pp <- unique(opp)
        if (length(pp) >= 1) {
          dv <- td[pp] %in% ftd
          if (any(dv)) { ## peaks in orig. data range
            ## Final S/N check
            if (any(d[pp[dv]]- baseline >= sdthr)) {
              ## if(!is.null(roiScales)) {
              ## allow roiScales to be a numeric of length 0
              if(length(roiScales) > 0) {
                ## use given scale
                best.scale.nr <- which(scales == roiScales[[f]])
                if(best.scale.nr > length(opp))
                  best.scale.nr <- length(opp)
              } else {
                ## try to decide which scale describes the peak best
                inti <- numeric(length(opp))
                irange <- rep(ceiling(scales[1]/2), length(opp))
                for (k in seq_along(opp)) {
                  kpos <- opp[k]
                  r1 <- ifelse(kpos - irange[k] > 1,
                               kpos-irange[k], 1)
                  r2 <- ifelse(kpos + irange[k] < length(d),
                               kpos + irange[k], length(d))
                  inti[k] <- sum(d[r1:r2])
                }
                maxpi <- which.max(inti)
                if (length(maxpi) > 1) {
                  m <- wCoefs[opp[maxpi], maxpi]
                  bestcol <- which(m == max(m),
                                   arr.ind = TRUE)[2]
                  best.scale.nr <- maxpi[bestcol]
                } else  best.scale.nr <- maxpi
              }
              
              best.scale <-  scales[best.scale.nr]
              best.scale.pos <- opp[best.scale.nr]
              
              pprange <- min(pp):max(pp)
              ## maxint <- max(d[pprange])
              lwpos <- max(1, best.scale.pos - best.scale)
              rwpos <- min(best.scale.pos + best.scale, length(td))
              p1 <- match(td[lwpos], otd)[1]
              p2 <- match(td[rwpos], otd)
              p2 <- p2[length(p2)]
              if (is.na(p1)) p1 <- 1
              if (is.na(p2)) p2 <- N
              mz.value <- omz[p1:p2]
              mz.int <- od[p1:p2]
              maxint <- max(mz.int)
              
              ## re-calculate m/z value for peak range
              mzrange <- range(mz.value)
              #mzmean <- do.call(mzCenterFun,
              #                  list(mz = mz.value,
              #                       intensity = mz.int))
              mzmean <- weighted.mean(mz.value, mz.int)
              ## Compute dppm only if needed
              dppm <- NA
              if (param$verboseColumns) {
                if (length(mz.value) >= (minCentroids + 1)) {
                  dppm <- round(min(running(abs(diff(mz.value)) /
                                              (mzrange[2] *  1e-6),
                                            fun = max,
                                            width = minCentroids)))
                } else {
                  dppm <- round((mzrange[2] - mzrange[1]) /
                                  (mzrange[2] * 1e-6))
                }
              }
              peaks <- rbind(peaks,
                             c(mzmean,mzrange, ## mz
                               NA, NA, NA,     ## rt, rtmin, rtmax,
                               NA,             ## intensity (sum)
                               NA,             ## intensity (-bl)
                               maxint,         ## max intensity
                               round((maxint - baseline) / sdnoise),  ##  S/N Ratio
                               NA,             ## Gaussian RMSE
                               NA,NA,NA,       ## Gaussian Parameters
                               f,              ## ROI Position
                               dppm,           ## max. difference between the [minCentroids] peaks in ppm
                               best.scale,     ## Scale
                               td[best.scale.pos],
                               td[lwpos],
                               td[rwpos],  ## Peak positions guessed from the wavelet's (scan nr)
                               NA, NA))                    ## Peak limits (scan nr)
              peakinfo <- rbind(peakinfo,
                                c(best.scale, best.scale.nr,
                                  best.scale.pos, lwpos, rwpos))
              ## Peak positions guessed from the wavelet's
            }
          }
        }
      }  ##for
    } ## if
    ##  postprocessing
    if (!is.null(peaks)) {
      colnames(peaks) <- c(basenames, verbosenames)
      colnames(peakinfo) <- c("scale", "scaleNr", "scpos",
                              "scmin", "scmax")
      for (p in seq_len(dim(peaks)[1])) {
        ## find minima (peak boundaries), assign rt and intensity values
        if (param$integrate == 1) {
          lm <- descendMinR(wCoefs[, peakinfo[p, "scaleNr"]],
                           istart = peakinfo[p, "scpos"])
          gap <- all(d[lm[1]:lm[2]] == 0) # looks like we got stuck in a gap right in the middle of the peak
          if ((lm[1] == lm[2]) || gap)   # fall-back
            lm <- descendMinTol(
              d, startpos = c(peakinfo[p, "scmin"],
                              peakinfo[p, "scmax"]),
              maxDescOutlier)
        } else {
          lm <- descendMinTol(d, startpos = c(peakinfo[p, "scmin"],
                                              peakinfo[p, "scmax"]),
                              maxDescOutlier)
        }
        ## Narrow peak rt boundaries by removing values below threshold
        lm <- .narrow_rt_boundaries(lm, d)
        lm_seq <- lm[1]:lm[2]
        pd <- d[lm_seq]
        
        peakrange <- td[lm]
        peaks[p, "rtmin"] <- scantime[peakrange[1]]
        peaks[p, "rtmax"] <- scantime[peakrange[2]]
        peaks[p, "maxo"] <- max(pd)
        pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) /
          (peakrange[2] - peakrange[1])
        if (is.na(pwid))
          pwid <- 1
        peaks[p, "into"] <- pwid * sum(pd)
        db <- pd - baseline
        peaks[p, "intb"] <- pwid * sum(db[db > 0])
        peaks[p, "lmin"] <- lm[1]
        peaks[p, "lmax"] <- lm[2]
        
        if (param$fitgauss) {
          ## perform gaussian fits, use wavelets for inital parameters
          td_lm <- td[lm_seq]
          md <- max(pd)
          d1 <- pd / md ## normalize data for gaussian error calc.
          pgauss <- fitGauss(td_lm, pd,
                             pgauss = list(mu = peaks[p, "scpos"],
                                           sigma = peaks[p, "scmax"] -
                                             peaks[p, "scmin"],
                                           h = peaks[p, "maxo"]))
          
          
          rtime <- peaks[p, "scpos"]
          if (!any(is.na(pgauss)) && all(pgauss > 0)) {
            gtime <- td[match(round(pgauss$mu), td)]
            if (!is.na(gtime)) {
              rtime <- gtime
              peaks[p, "mu"] <- pgauss$mu
              peaks[p, "sigma"] <- pgauss$sigma
              peaks[p, "h"] <- pgauss$h
              peaks[p,"egauss"] <- sqrt(
                (1 / length(td_lm)) *
                  sum(((d1 - gauss(td_lm, pgauss$h / md,
                                   pgauss$mu, pgauss$sigma))^2)))
            }
          }
          peaks[p, "rt"] <- scantime[rtime]
          ## avoid fitting side effects
          if (peaks[p, "rt"] < peaks[p, "rtmin"])
            peaks[p, "rt"] <- scantime[peaks[p, "scpos"]]
        } else
          peaks[p, "rt"] <- scantime[peaks[p, "scpos"]]
      }
      peaks <- joinOverlappingPeaks(td, d, otd, omz, od, scantime,
                                    scan.range, peaks, maxGaussOverlap)
    }
    if (!is.null(peaks)) {
      peaklist[[length(peaklist) + 1]] <- peaks
    }
  } ## f
  
  #save.image(file = paste0(length(roiList),"_",Sys.time(),".RData"));
  
  if (length(peaklist) == 0) {
    if(!.SwapEnv$.optimize_switch){
      warning("No peaks found!")
    }
    
    if (param$verboseColumns) {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
                          length(verbosenames))
      colnames(nopeaks) <- c(basenames, verbosenames)
    } else {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames))
      colnames(nopeaks) <- c(basenames)
    }
    
    if(.optimize_switch) {
      # do nothing
    } else {
      message(" FAIL: none found!")
    }
    return(nopeaks)
  }
  
  p <- do.call(rbind, peaklist)
  # save(p, file = paste0("p_",Sys.time(),".rda"))
  
  if (!param$verboseColumns)
    p <- p[, basenames, drop = FALSE]
  
  uorder <- order(p[, "into"], decreasing = TRUE)
  pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE])
  uindex <- rectUniqueR(pm, uorder, param$mzdiff, ydiff = -0.00001) ## allow adjacent peaks
  pr <- p[uindex, , drop = FALSE]
  
  
  if (.on.public.web() & !.optimize_switch){
    print_mes_tmp <- paste0(" OK: ", nrow(pr), " found.");    
    print_mes <- paste0(print_mes, print_mes_tmp)
    write.table(print_mes, file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
    
  } else if(.optimize_switch) {
    # do nothing
  } else {
    message(" OK: ", nrow(pr), " found.")
  }
  
  return(pr)
}

#' @title PeakPicking_Massifquant_slave
#' @description PeakPicking_Massifquant_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_Massifquant_slave <- function(x, param){
  
  # if(.on.public.web()){
  #   dyn.load(.getDynLoadPath());
  # }
  # load necessary C code for data processing
  
  if (is(x,"OnDiskMSnExp")){ # for raw data processing
    
    scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
    rt <- unname(MSnbase::rtime(x));
    
  } else if (is(x,"list")) { # for parameters optimization
    
    scan.set <- x;
    rt <- unlist(lapply(x,  MSnbase::rtime), use.names = FALSE);
    
  }
  
  mzs <- lapply(scan.set, MSnbase::mz);
  
  vals_per_spect <- lengths(mzs, FALSE);
  
  #if (any(vals_per_spect == 0))
  #  mzs <- mzs[-which(vals_per_spect == 0)];
  #  x <- x[-which(vals_per_spect == 0)]; # Cannot be so easy !
  #  warning("Found empty spectra Scan. Filtered Automatically !")
  
  mz = unlist(mzs, use.names = FALSE);
  int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
  valsPerSpect = vals_per_spect;
  scantime = rt;
  
  #------------Massifquant param ------------------/
  #mz,
  #int,
  #scantime,
  #valsPerSpect,
  ppm = param$ppm;
  peakwidth = param$peakwidth;
  snthresh = param$snthresh;
  prefilter = param$prefilter;
  mzCenterFun = "wMean";
  integrate = param$integrate;
  mzdiff = param$mzdiff;
  fitgauss = param$fitgauss;
  noise = param$noise;
  
  ##### Massifquant specific paramters----|
  verboseColumns = FALSE;
  criticalValue = param$criticalValue;
  consecMissedLimit = param$consecMissedLimit;
  unions = param$unions;
  checkBack = param$checkBack;
  withWave = param$withWave;
  ##### Massifquant specific paramters----|
  
  
  #------------Massifquant param ------------------/
  
  if (!is.double(mz))
    mz <- as.double(mz)
  if (!is.double(int))
    int <- as.double(int)
  ## Fix the mzCenterFun
  mzCenterFun <- paste("mzCenter",
                       gsub(mzCenterFun, pattern = "mzCenter.",
                            replacement = "", fixed = TRUE), sep=".")
  if(mzCenterFun == "mzCenter.wMean"){
    mzCenter.wMean <- weighted.mean;
  }
  
  if (!exists(mzCenterFun, mode="function"))
    stop(" >", mzCenterFun, "< not defined !")
  
  if(.on.public.web()){
    
    print_mes_tmp <- paste0("Detecting mass traces at ", ppm, "ppm ... ");    
    #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
    #print_mes <- paste0(print_mes,print_mes_tmp)
    
    .SwapEnv$count_current_sample <- count_current_sample <- .SwapEnv$count_current_sample +1;
    count_total_sample <- .SwapEnv$count_total_sample;
    write.table(count_current_sample, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
    write.table(25 + count_current_sample*3/count_total_sample*25, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
    
  } else {
    message("Detecting  mass traces at ",ppm," ppm ... ", appendLF = FALSE)
  }
  
  
  flush.console()
  
  #massifquantROIs <- do_findKalmanROI(mz = mz, int = int, scantime = scantime,
  #                                    valsPerSpect = valsPerSpect,
  #                                    minIntensity = prefilter[2],
  #                                    minCentroids = peakwidth[1],
  #                                    criticalVal = criticalValue,
  #                                    consecMissedLim = consecMissedLimit,
  #                                    segs = unions, scanBack = checkBack,
  #                                    ppm = ppm)
  mzrange = c(0.0, 0.0);
  scanrange = c(1, length(scantime));
  minIntensity = prefilter[2];
  minCentroids = peakwidth[1];
  segs = unions;
  scanBack = checkBack;
  scanindex <- valueCount2ScanIndex(valsPerSpect)
  
  #scanindex <- valueCount2ScanIndex(valsPerSpect) 
  ## Call the C function.
  if (!is.integer(scanindex))
    scanindex <- as.integer(scanindex)
  
  if (!is.double(scantime))
    scantime <- as.double(scantime)

    tmp <- capture.output(
      massifquantROIs <- massifquantROIs(
        mz = mz, 
        int = int, 
        scanindex = scanindex, 
        scantime = scantime,
        mzrange = mzrange, 
        scanrange = scanrange,
        minIntensity = minIntensity,
        minCentroids = minCentroids, 
        consecMissedLim = consecMissedLimit,
        ppm = ppm, 
        criticalVal = criticalValue, 
        segs = segs,
        scanBack = scanBack))

  #if (withWave) {
  if (FALSE) {
    # featlist <- do_findChromPeaks_centWave(mz = mz, int = int,
    #                                        scantime = scantime,
    #                                        valsPerSpect = valsPerSpect,
    #                                        ppm = ppm, peakwidth = peakwidth,
    #                                        snthresh = snthresh,
    #                                        prefilter = prefilter,
    #                                        mzCenterFun = mzCenterFun,
    #                                        integrate = integrate,
    #                                        mzdiff = mzdiff,
    #                                        fitgauss = fitgauss,
    #                                        noise = noise,
    #                                        verboseColumns = verboseColumns,
    #                                        roiList = massifquantROIs)
  } else {
    ## Get index vector for C calls
    scanindex <- valueCount2ScanIndex(valsPerSpect);
    basenames <- c("mz","mzmin","mzmax","rtmin","rtmax","rt", "into");
    
    if (length(massifquantROIs) == 0) {
      if(!.SwapEnv$.optimize_switch){
        warning("\nNo peaks found!")
      }
      nopeaks <- matrix(nrow=0, ncol=length(basenames))
      colnames(nopeaks) <- basenames
      return(nopeaks)
    }
    
    ## Get the max intensity for each peak.
    maxo <- lapply(massifquantROIs, function(z) {
      raw <- .rawMat(mz = mz, int = int, scantime = scantime,
                     valsPerSpect = valsPerSpect,
                     mzrange = c(z$mzmin, z$mzmax),
                     scanrange = c(z$scmin, z$scmax))
      max(raw[, 3])
    })
    
    ## p <- t(sapply(massifquantROIs, unlist))
    p <- do.call(rbind, lapply(massifquantROIs, unlist, use.names = FALSE))
    colnames(p) <- basenames
    p <- cbind(p, maxo = unlist(maxo))
    
    #calculate median index
    p[, "rt"] <- as.integer(p[, "rtmin"] + ( (p[, "rt"] + 1) / 2 ) - 1)
    #convert from index into actual time
    p[, "rtmin"] <- scantime[p[, "rtmin"]]
    p[, "rtmax"] <- scantime[p[, "rtmax"]]
    p[, "rt"] <- scantime[p[, "rt"]]
    
    uorder <- order(p[, "into"], decreasing = TRUE)
    pm <- as.matrix(p[, c("mzmin", "mzmax", "rtmin", "rtmax"),
                      drop = FALSE])
    uindex <- rectUniqueR(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks;
    featlist <- p[uindex, , drop = FALSE]
    #message(" ", dim(featlist)[1]," Peaks.");
    
    .optimize_switch <- .SwapEnv$.optimize_switch;
    if(is.null(.optimize_switch)){
      .optimize_switch <- FALSE;
    }
    
    if(.on.public.web() & !.optimize_switch){
      
      print_mes_tmp2 <- paste0(" OK: ", dim(featlist)[1], " Peaks found.");
      print_mes <- paste0(print_mes_tmp,print_mes_tmp2);
      write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
      
    } else {
      
      message("OK")
      
    }
    
    return(featlist)
  }
  
  return(featlist)
}

#' @title PeakPicking_MatchedFilter_slave
#' @description PeakPicking_MatchedFilter_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @importFrom stats deriv3
#' @importFrom stats fft
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_MatchedFilter_slave <- function(x,param){
  
  # if(.on.public.web()){
  #   dyn.load(.getDynLoadPath());
  # }
  
  if (is(x,"OnDiskMSnExp")){ # for raw data processing
    
    scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
    rt <- MSnbase::rtime(x);
    
  } else if (is(x, "list")){ # for parameters optimization
    
    scan.set <- x;
    rt <- unlist(lapply(x,  MSnbase::rtime), use.names = FALSE);
    
  }
  
  ## 1. data & Param preparation
  mzs <- lapply(scan.set, MSnbase::mz)
  valsPerSpect <- lengths(mzs, FALSE)
  mz = unlist(mzs, use.names = FALSE);
  int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
  scantime = rt;
  mrange <- range(mz[mz > 0]);
  binSize <- param$peakBinSize;
  mass <- seq(floor(mrange[1] / binSize) * binSize,
              ceiling(mrange[2] / binSize) * binSize,
              by = binSize)
  impute <- param$impute;
  baseValue <- param$baseValue;
  distance <- param$distance;
  steps <- param$steps;
  sigma <- param$sigma;
  snthresh <-param$snthresh;
  max <- param$max;
  mzdiff <- param$mzdiff;
  index <- param$index;
  
  
  ## 2. Binning the data.
  ## Create and translate settings for binYonXR
  
  toIdx <- cumsum(valsPerSpect)
  fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L)
  shiftBy = TRUE
  
  binFromX <- min(mass)
  binToX <- max(mass)
  ## binSize <- (binToX - binFromX) / (length(mass) - 1)
  ## brks <- seq(binFromX - binSize/2, binToX + binSize/2, by = binSize)
  
  brks <- breaks_on_nBinsR(fromX = binFromX, toX = binToX,
                          nBins = length(mass), shiftByHalfBinSize = TRUE)
  
  binRes <- binYonXR(mz, int,
                    breaks = brks,
                    fromIdx = fromIdx,
                    toIdx = toIdx,
                    baseValue = ifelse(impute == "none", yes = 0, no = NA),
                    sortedX = TRUE,
                    returnIndex = TRUE
  )
  
  
  if (length(toIdx) == 1)
    binRes <- list(binRes)
  bufMax <- do.call(cbind, lapply(binRes, function(z) return(z$index)))
  bin_size <- binRes[[1]]$x[2] - binRes[[1]]$x[1]
  ## Missing value imputation
  if (missing(baseValue))
    baseValue <- numeric()
  if (length(baseValue) == 0)
    baseValue <- min(int, na.rm = TRUE) / 2
  if (missing(distance))
    distance <- numeric()
  if (length(distance) == 0)
    distance <- floor(0.075 / bin_size)
  
  binVals <- lapply(binRes, function(z) {
    return(imputeLinInterpol(z$y, method = impute, distance = distance,
                             noInterpolAtEnds = TRUE,
                             baseValue = baseValue))
  })
  
  buf <- do.call(cbind, binVals)
  
  bufidx <- 1L:length(mass)
  lookahead <- steps-1
  lookbehind <- 1
  
  N <- nextn(length(scantime))
  xrange <- range(scantime)
  x <- c(0:(N/2), -(ceiling(N/2-1)):-1) *
    (xrange[2]-xrange[1])/(length(scantime)-1)
  
  filt <- -attr(eval(deriv3(~ 1/(sigma*sqrt(2*pi)) *
                              exp(-x^2/(2*sigma^2)), "x")), "hessian")
  filt <- filt/sqrt(sum(filt^2))
  filt <- fft(filt, inverse = TRUE)/length(filt)
  
  cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf",
              "maxo", "maxf", "i", "sn")
  num <- 0
  ResList <- list()
  
  ## Can not do much here, lapply/apply won't work because of the 'steps' parameter.
  ## That's looping through the masses, i.e. rows of the profile matrix.
  for (i in seq(length = length(mass)-steps+1)) {
    ymat <- buf[bufidx[i:(i+steps-1)], , drop = FALSE]
    ysums <- colMaxR(ymat)
    yfilt <- filtfft(ysums, filt)
    gmax <- max(yfilt)
    for (j in seq(length = max)) {
      maxy <- which.max(yfilt)
      noise <- mean(ysums[ysums > 0])
      ##noise <- mean(yfilt[yfilt >= 0])
      sn <- yfilt[maxy]/noise
      if (yfilt[maxy] > 0 && yfilt[maxy] > snthresh*noise && ysums[maxy] > 0) {
        peakrange <- DescendZeroR(yfilt, maxy)
        intmat <- ymat[, peakrange[1]:peakrange[2], drop = FALSE]
        mzmat <- matrix(mz[bufMax[bufidx[i:(i+steps-1)],
                                  peakrange[1]:peakrange[2]]],
                        nrow = steps)
        which.intMax <- which.colMax(intmat)
        mzmat <- mzmat[which.intMax]
        if (all(is.na(mzmat))) {
          yfilt[peakrange[1]:peakrange[2]] <- 0
          next
        }
        mzrange <- range(mzmat, na.rm = TRUE)
        massmean <- weighted.mean(mzmat, intmat[which.intMax],
                                  na.rm = TRUE)
        ## This case (the only non-na m/z had intensity 0) was reported
        ## by Gregory Alan Barding "binlin processing"
        if(any(is.na(massmean))) {
          massmean <- mean(mzmat, na.rm = TRUE)
        }
        pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) /
          (peakrange[2] - peakrange[1])
        into <- pwid*sum(ysums[peakrange[1]:peakrange[2]])
        intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]])
        maxo <- max(ysums[peakrange[1]:peakrange[2]])
        maxf <- yfilt[maxy]
        
        yfilt[peakrange[1]:peakrange[2]] <- 0
        num <- num + 1
        ResList[[num]] <- c(massmean, mzrange[1], mzrange[2], maxy,
                            peakrange, into, intf, maxo, maxf, j, sn)
      } else
        break
    }
  }
  
  
  if (length(ResList) == 0) {
    rmat <- matrix(nrow = 0, ncol = length(cnames))
    colnames(rmat) <- cnames
    return(rmat)
  }
  rmat <- do.call(rbind, ResList)
  if (is.null(dim(rmat))) {
    rmat <- matrix(rmat, nrow = 1)
  }
  colnames(rmat) <- cnames
  max <- max-1 + max*(steps-1) + max*ceiling(mzdiff/binSize)
  if (index){
    mzdiff <- mzdiff/binSize
  }  else {
    rmat[, "rt"] <- scantime[rmat[, "rt"]]
    rmat[, "rtmin"] <- scantime[rmat[, "rtmin"]]
    rmat[, "rtmax"] <- scantime[rmat[, "rtmax"]]
  }
  ## Select for each unique mzmin, mzmax, rtmin, rtmax the largest peak
  ## and report that.
  uorder <- order(rmat[, "into"], decreasing = TRUE)
  uindex <- rectUniqueR(rmat[, c("mzmin", "mzmax", "rtmin", "rtmax"),
                            drop = FALSE],
                       uorder, mzdiff)
  rmat <- rmat[uindex,,drop = FALSE]
  
  return(rmat)
  
}

#' @title PerformPeakGrouping
#' @description PerformPeakGrouping
#' @param mSet mSet object.
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @noRd
PerformPeakGrouping<-function(mSet){
  
  if(missing(mSet) & file.exists("mSet.rda")){
    load("mSet.rda")
  } else if(missing(mSet) & !file.exists("mSet.rda")){
    if(.on.public.web()){
      MessageOutput("ERROR: mSet is missing !", NULL, NULL);
      stop();
    } else {
      stop("mSet is missing ! Please make sure mSet is well defined !");
    }
  }
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 200; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }

  param <- mSet@params;
  
  ## 1. Verify & Extract Information-------
  
  if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
    } else {
      stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
    }
  }
  
  if(length(mSet@peakRTcorrection)==0){
    
    if(length(mSet@peakpicking) == 0){
      MessageOutput("ERROR: No Chromatographic peaks detected, please \"PerformPeakPicking\" first !")
    } else {
      peaks_0 <- mSet@peakpicking$chromPeaks;
    }
    
  } else {
    peaks_0 <- mSet@peakRTcorrection$chromPeaks;
  }
  
  peaks <- OptiChromPeaks(peaks_0);
  
  peaks <- cbind(peaks_0[, c("mz", "rt", "sample", "into"), drop = FALSE],
                 index = seq_len(nrow(peaks_0)))
  
  if (length(mSet@rawOnDisk) != 0) {
    
    sample_group<-as.character(mSet@rawOnDisk@phenoData@data[["sample_group"]]);
    if (identical(sample_group,character(0))){
      sample_group<-as.character(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
    }
    
  } else if (length(mSet@rawInMemory) != 0) {
    
    sample_group<-as.character(mSet@rawInMemory@phenoData@data[["sample_group"]]);   
    if (identical(sample_group,character(0))){
      sample_group<-as.character(mSet@rawInMemory@phenoData@data[["sample_name"]]);
    }
    
  } else {
    stop("Exceptions occurs during data import process. Please check your data carefully!")
  }
  
  sampleGroupNames <- unique(sample_group)
  sampleGroupTable <- table(sample_group)
  nSampleGroups <- length(sampleGroupTable)
  
  # if BLANK Substraction has already been performed, filter out all blank features here
  if(!is.null(mSet@peakRTcorrection[["BlankSubstracted"]])){
    if(mSet@peakRTcorrection[["BlankSubstracted"]]){
      peaks <- peaks[peaks[,"sample"] != 0,];
      sample_group <- sample_group[sample_group != "BLANK"]
      sampleGroupNames <- sampleGroupNames[sampleGroupNames != "BLANK"]
      sampleGroupTable <- sampleGroupTable[-which(names(sampleGroupTable) == "BLANK")]
      nSampleGroups <- length(sampleGroupTable)
    }
  }
  
  ## 2. Order peaks matrix by mz-------
  peaks <- peaks[order(peaks[, "mz"]), , drop = FALSE]
  rownames(peaks) <- NULL
  rtRange <- range(peaks[, "rt"])
  
  ## 3. Define the mass slices and the index in the peaks matrix with an mz-------
  mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + param$binSize,
              by = param$binSize / 2)
  masspos <- findEqualGreaterMR(peaks[, "mz"], mass)
  
  densFrom <- rtRange[1] - 3 * param$bw
  densTo <- rtRange[2] + 3 * param$bw
  
  ## 4. Increase the number of sampling points for the density distribution.-------
  densN <- max(512, 2 * 2^(ceiling(log2(diff(rtRange) / (param$bw / 2)))))
  endIdx <- 0
  
  if (!.optimize_switch){
    print_mes <- paste0("Total of ", length(mass) - 1, " slices detected for processing... ");
  }
  
  resL <- vector("list", (length(mass) - 2))
  
  for (i in seq_len(length(mass)-2)) {
    ## That's identifying overlapping mz slices.
    startIdx <- masspos[i]
    endIdx <- masspos[i + 2] - 1
    if (endIdx - startIdx < 0)
      next
    resL[[i]] <- Densitygrouping_slave(peaks[startIdx:endIdx, , drop = FALSE],
                                       bw = param$bw, densFrom = densFrom,
                                       densTo = densTo, densN = densN,
                                       sampleGroups = sample_group,
                                       sampleGroupTable = sampleGroupTable,
                                       minFraction = param$minFraction,
                                       minSamples = param$minSamples,
                                       maxFeatures = param$maxFeatures)
  }
  
  if (!.optimize_switch){
    print_mes_tmp <- paste("Done ! \nGoing to the next step...");
    MessageOutput(mes = paste0(print_mes,print_mes_tmp), ecol = "\n", NULL)
  }
  
  res <- do.call(rbind, resL)
  
  if (nrow(res)) {
    ## Remove groups that overlap with more "well-behaved" groups
    numsamp <- rowSums(
      as.matrix(res[, (match("npeaks", colnames(res)) +1):(ncol(res) -1),
                    drop = FALSE]))
    uorder <- order(-numsamp, res[, "npeaks"])
    
    uindex <- rectUniqueR(
      as.matrix(res[, c("mzmin", "mzmax", "rtmin", "rtmax"),
                    drop = FALSE]), uorder)
    res <- res[uindex, , drop = FALSE]
    rownames(res) <- NULL
  }
  
  df <- S4Vectors::DataFrame(res)
  rownames(df) <- sprintf(paste0("FT", "%0", ceiling(log10(nrow(df) + 1L)), "d"),
                          seq(from = 1, length.out = nrow(df)))
  
  FeatureGroupTable <- df;
  n <- length(mSet@peakgrouping) + 1;
  mSet@peakgrouping[[n]] <- FeatureGroupTable;

  return(mSet)
}

#' @title Densitygrouping_slave
#' @description Densitygrouping_slave
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @noRd
Densitygrouping_slave <- function(x, bw, densFrom, densTo, densN, sampleGroups,
                                  sampleGroupTable, minFraction,
                                  minSamples, maxFeatures) {
  den <- density(x[, "rt"], bw = bw, from = densFrom, to = densTo,
                 n = densN)
  maxden <- max(den$y)
  deny <- den$y
  sampleGroupNames <- names(sampleGroupTable)
  nSampleGroups <- length(sampleGroupNames)
  col_nms <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",
               "npeaks", sampleGroupNames)
  res_mat <- matrix(nrow = 0, ncol = length(col_nms),
                    dimnames = list(character(), col_nms))
  res_idx <- list()
  while (deny[maxy <- which.max(deny)] > maxden / 20 && nrow(res_mat) <
         maxFeatures) {
    grange <- descendMinR(deny, maxy)
    deny[grange[1]:grange[2]] <- 0
    gidx <- which(x[,"rt"] >= den$x[grange[1]] &
                    x[,"rt"] <= den$x[grange[2]])
    ## Determine the sample group of the samples in which the peaks
    ## were detected and check if they correspond to the required limits.
    tt <- table(sampleGroups[unique(x[gidx, "sample"])])
    if (!any(tt / sampleGroupTable[names(tt)] >= minFraction &
             tt >= minSamples))
      next
    gcount <- rep(0, length(sampleGroupNames))
    names(gcount) <- sampleGroupNames
    gcount[names(tt)] <- as.numeric(tt)
    res_mat <- rbind(res_mat,
                     c(median(x[gidx, "mz"]),
                       range(x[gidx, "mz"]),
                       median(x[gidx, "rt"]),
                       range(x[gidx, "rt"]),
                       length(gidx),
                       gcount)
    )
    res_idx <- c(res_idx, list(unname(sort(x[gidx, "index"]))))
  }
  
  res <- as.data.frame(res_mat)
  res$peakidx <- res_idx
  res
}

#' @title PerformPeakAlignment
#' @description PerformPeakAlignment
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @export
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @return will return an mSet object with peak aligned done
#' @examples 
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#'                full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' mSet <- PerformPeakAlignment(mSet);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)

PerformPeakAlignment<-function(mSet){
  ## 1~4. Peak Grouping ---------
  mSet<-PerformPeakGrouping(mSet);
  
  ## Insert blank substraction here
  mSet <- PerformBlankSubstraction(mSet);
  
  ## 5. Perform RT correction ---------
  mSet<-PerformRTcorrection(mSet)
  
  ## 6~9. Peak Grouping Again ---------
  mSet<-PerformPeakGrouping(mSet)
  
  ## 10. Return ------
  return(mSet)
}

PerformRTcorrection <- function(mSet){
  
  ## 5. Adjust Retention Time-------
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }

  if(missing(mSet) & file.exists("mSet.rda")){
    load("mSet.rda")
  } else if(missing(mSet) & !file.exists("mSet.rda")){
    if(.on.public.web()){
      MessageOutput("ERROR: mSet is missing !", NULL, NULL);
      stop();
    } else {
      stop("mSet is missing ! Please make sure mSet is well defined !");
    }
  }
  
  #TODO: add a function to verify the param in mSet
  
  if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
    } else {
      stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
    }
  }
  
  if(length(mSet@peakgrouping) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No grouped peaks found, please \"PerformPeakGrouping\" first !", NULL, NULL)
    } else {
      stop("No grouped peaks found, please \"PerformPeakGrouping\" first !")
    }
  }
  
  param <- mSet@params;
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  if (.on.public.web() & !.optimize_switch){
    MessageOutput(mes = paste("Retention time correction is running."),
                  ecol = "\n",
                  progress = 60)
  }
  
  if (param[["RT_method"]] == "obiwarp"){
    if (!.optimize_switch){
      MessageOutput(mes = paste("obiwarp is used for retention time correction. \n"),
                    ecol = "",
                    progress = NULL)
    }
    
    mSet <- tryCatch(adjustRtime_obiwarp(mSet, param, msLevel = 1L), error = function(e){e});
  } else {
    if (!.optimize_switch){
      MessageOutput(mes = paste("PeakGroup -- loess is used for retention time correction."),
                    ecol = "",
                    progress = NULL)
    }
    
    mSet <- tryCatch(adjustRtime_peakGroup(mSet, param, msLevel = 1L), error = function(e){e});
  }
  
  if (is(mSet,"simpleError") & !.optimize_switch){
   
    MessageOutput(
      mes = paste0("<font color=\"red\">","\nERROR:",mSet$message,"</font>"),
      ecol = "\n",
      progress = NULL
    )
    stop("EXCEPTION POINT CODE: RT1");
  }
  
  #
  if (!.optimize_switch){
    MessageOutput(
      mes = "Done !",
      ecol = "\n",
      progress = 88
    )
  }
  # 
  # if(.on.public.web()){
  #   save(mSet, file = "mSet.rda");
  # }
  # 
  return(mSet);
}

adjustRtime_peakGroup <- function(mSet, param, msLevel = 1L) {
  
  # 1). Group Matrix-------
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  # 1.1). Subset selection
  
  if (length(mSet@rawOnDisk) != 0){
    if(param$BlankSub){
      subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
      subs <- subs[mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"]
      specdata <- mSet@rawOnDisk;
      Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");
    } else {
      subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
      specdata <- mSet@rawOnDisk;
      Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");
    }
   
    if (length(Subset_QC) < 30){
      Subset <- integer(0);
    } else {
      Subset <- Subset_QC;
    }
    
    peaks_0 <- mSet@peakpicking$chromPeaks;
    
  } else {
    subs <- seq_along(mSet@rawInMemory@phenoData@data[["sample_name"]]);
    specdata <- mSet@rawInMemory;
    Subset_QC <- which(mSet@rawInMemory@phenoData@data[["sample_group"]]=="QC");
    # files_list <- mSet[[format]]@processingData@files;
    
    if (length(Subset_QC) < 30){
      Subset <- integer(0);
    } else {
      Subset <- Subset_QC;
    }
    
    peaks_0 <- mSet@peakpicking$chromPeaks;
  }
  
  if(!identical(Subset, integer(0))){
    subs <- Subset;
  }
  
  nSamples <- length(subs)
  
  subset_names <- original_names <- mSet@peakpicking$chromPeakData@rownames;
  n <- length(mSet@peakgrouping);
  pidx <- mSet@peakgrouping[[n]]$peakidx; # RT correction based on latest grouping results
  
  mSet@peakgrouping[[n]]$peakidx <- lapply(pidx, function(z) {
    idx <- base::match(original_names[z], subset_names)
    idx[!is.na(idx)]
  })
  peakIndex_0 <- mSet@peakgrouping[[n]][lengths(mSet@peakgrouping[[n]]$peakidx) > 0, ]
  peakIndex <- .peakIndex(peakIndex_0)
  
  
  pkGrpMat <- .getPeakGroupsRtMatrix(peaks = peaks_0,
                                     peakIndex = peakIndex,
                                     sampleIndex = subs,
                                     missingSample = nSamples - (nSamples * param$minFraction),
                                     extraPeaks = param$extraPeaks
  )
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  if (is.null(pkGrpMat) & !.optimize_switch){
    MessageOutput(mes = paste0("<font color=\"red\">","\nERROR:","No enough peaks detected, please adjust your parameters or use other Peak/Alignment method","</font>"),
                  ecol = "\n",
                  progress = 65)
  }
  colnames(pkGrpMat) <- specdata@phenoData@data[["sample_name"]][subs]
  
  if (!.optimize_switch){
    MessageOutput(mes =paste("...."), "\n", 65)
  }
  
  # 2). RT adjustment-------
  if(param$BlankSub){
    rtime_0 <- rtime(specdata)
    
    rtime0 <- split(rtime_0, fromFile(specdata))
    tmp <- rtime0[subs]
    rtime <- vector("list", length(fileNames(specdata)))
    names(rtime) <- as.character(seq_along(rtime))
    rtime[as.numeric(names(tmp))] <- tmp
    rtime <- rtime[subs]
    names(rtime) <- seq_along(rtime)
    
    peaks_1 <- peaks_0;
    peaks_1[!(peaks_1[, "sample"] %in% subs), "sample"] <- 0;
    #### "sample" == 0 means this samples are considered as blank and will be excluded for future!
    j <- 1;
    for(jj in unique(peaks_1[,"sample"])){
      if(jj == 0) next;
      peaks_1[peaks_1[,"sample"] == jj, "sample"] <- j;
      j <- j + 1
    }
  } else {
    rtime_0 <- rtime(specdata)
    
    tmp <- split(rtime_0, fromFile(specdata));
    rtime <- vector("list", length(fileNames(specdata)));
    names(rtime) <- as.character(seq_along(rtime));
    rtime[as.numeric(names(tmp))] <- tmp;

    peaks_1 <- peaks_0;
  }

  
  res <- RT.Adjust_peakGroup(peaks_1,
                             peakIndex = peakIndex_0$peakidx,
                             rtime = rtime,
                             minFraction = param$minFraction,
                             extraPeaks = param$extraPeaks,
                             smooth = param$smooth,
                             span = param$span,
                             family = param$family,
                             peakGroupsMatrix = pkGrpMat,
                             subset = Subset,
                             subsetAdjust = param$subsetAdjust)

  
  if (!.optimize_switch){
    MessageOutput(mes ="Applying retention time adjustment to the identified chromatographic peaks ... ",
                  "", 
                  66)
  }
  
  if(param$BlankSub){
    #names(rtime) <- names(res) <- subs;
    #rtime0[subs] <- res;
    
    #ChromPeaks <- mSet@peakpicking$chromPeaks;
    #ChromPeaks <- ChromPeaks[ChromPeaks[,"sample"] %in% subs,]
    ChromPeaks <- peaks_1;
    fts <- .applyRtAdjToChromPeaks(ChromPeaks,
                                   rtraw = rtime,
                                   rtadj = res);
    fts <- fts[!mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]],]
    mSet@peakRTcorrection$BlankSubstracted <- TRUE;
  } else {
    fts <- .applyRtAdjToChromPeaks(mSet@peakpicking$chromPeaks,
                                   rtraw = rtime,
                                   rtadj = res)
    mSet@peakRTcorrection$BlankSubstracted <- FALSE;
  }
  
  mSet@peakRTcorrection[["pkGrpMat_Raw"]] <- pkGrpMat;
  mSet@peakRTcorrection[["chromPeaks"]] <- fts;
  mSet@peakRTcorrection[["adjustedRT"]] <- res;
  
  return(mSet);
}

#' @importFrom stats approxfun
adjustRtime_obiwarp <- function(mSet, param, msLevel = 1L) {
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  ## Filter for MS level, perform adjustment and if the object
  ## contains spectra from other MS levels too, adjust all raw
  ## rts based on the difference between adjusted and raw rts.
  
  if (length(mSet@rawOnDisk) != 0){
    if (param$BlankSub) {
      subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
      subs <-
        subs[mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"]
    } else {
      subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
    }
    specdata <- mSet@rawOnDisk;
    
    object_sub <- MSnbase::filterMsLevel(
      MSnbase::selectFeatureData(specdata,
                                 fcol = c(.MSnExpReqFvarLabels,
                                          "centroided")), msLevel. = 1);

  } else {
    subs <- seq_along(mSet@rawInMemory@phenoData@data[["sample_name"]]);
    specdata <- mSet@rawInMemory;
    object_sub <- MSnbase::filterMsLevel(specdata, msLevel. = 1); # No need to filter!
  }
  
  #object <- mSet@rawOnDisk;

  if (length(object_sub) == 0)
    stop("No spectra of MS level ", msLevel, " present")
  
  res <- mSet.obiwarp(mSet, object_sub, param = param)
  
  ## Adjust the retention time for spectra of all MS levels, if
  ## if there are some other than msLevel (issue #214).
  if (length(unique(msLevel(specdata))) !=
      length(unique(msLevel(object_sub)))) {
    
    if (!.optimize_switch){
      MessageOutput(
        mes = paste0("Apply retention time correction performed on MS",
                     msLevel, " to spectra from all MS levels - obiwarp"),
        ecol = "",
        progress = 66
      )
    } 

    ## I need raw and adjusted rt for the adjusted spectra
    ## and the raw rt of all.
    rtime_all <- split(rtime(specdata), fromFile(specdata))
    rtime_sub <- split(rtime(object_sub), fromFile(object_sub))
    ## For loop is faster than lapply. No sense to do parallel
    for (i in seq_along(rtime_all)) {
      n_vals <- length(rtime_sub[[i]])
      idx_below <- which(rtime_all[[i]] < rtime_sub[[i]][1])
      if (length(idx_below))
        vals_below <- rtime_all[[i]][idx_below]
      idx_above <- which(rtime_all[[i]] >
                           rtime_sub[[i]][n_vals])
      if (length(idx_above))
        vals_above <- rtime_all[[i]][idx_above]
      ## Adjust the retention time. Note: this should be
      ## OK even if values are not sorted.
      adj_fun <- approxfun(x = rtime_sub[[i]], y = res[[i]])
      rtime_all[[i]] <- adj_fun(rtime_all[[i]])
      ## Adjust rtime < smallest adjusted rtime.
      if (length(idx_below)) {
        rtime_all[[i]][idx_below] <- vals_below +
          res[[i]][1] - rtime_sub[[i]][1]
      }
      ## Adjust rtime > largest adjusted rtime
      if (length(idx_above)) {
        rtime_all[[i]][idx_above] <- vals_above +
          res[[i]][n_vals] - rtime_sub[[i]][n_vals]
      }
    }
    res <- rtime_all
  }
  res <- unlist(res, use.names = FALSE)
  sNames <- unlist(split(featureNames(specdata), fromFile(specdata)),
                   use.names = FALSE)
  names(res) <- sNames
  res <- res[featureNames(specdata)]
  #res;
  
  adjustedRtime_res <- unname(split(res, fromFile(specdata)));
  #mSet[["msFeatureData"]][["adjustedRT"]] <- adjustedRtime_res;
  
  rtime_0 <- rtime(specdata)
  tmp <- split(rtime_0, fromFile(specdata))
  rtime <- vector("list", length(fileNames(specdata)))
  names(rtime) <- as.character(seq_along(rtime))
  rtime[as.numeric(names(tmp))] <- tmp
  
  fts <- .applyRtAdjToChromPeaks(mSet@peakpicking$chromPeaks,
                                 rtraw = rtime,
                                 rtadj = adjustedRtime_res);
  
  if(param$BlankSub){
    fts[!(fts[, "sample"] %in% subs), "sample"] <- 0;
    #### "sample" == 0 means this samples are considered as blank and will be excluded for future!
    j <- 1;
    for(jj in unique(fts[,"sample"])){
      if(jj == 0) next;
      fts[fts[,"sample"] == jj, "sample"] <- j;
      j <- j + 1
    }
    fts <- fts[!mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]],];
    adjustedRtime_res <- adjustedRtime_res[subs];
    mSet@peakRTcorrection$BlankSubstracted <- TRUE;
  } else {
    mSet@peakRTcorrection$BlankSubstracted <- FALSE;
  }
  
  mSet@peakRTcorrection[["chromPeaks"]] <- fts;
  mSet@peakRTcorrection[["adjustedRT"]] <- adjustedRtime_res;
  
  return(mSet);
}

#' @title RT.Adjust_peakGroup
#' @description RT.Adjust_peakGroup
#' @param peaks peaks
#' @param peakIndex peakIndex
#' @param rtime rtime
#' @param minFraction minFraction
#' @param extraPeaks extraPeaks
#' @param smooth smooth
#' @param span span
#' @param family family
#' @param peakGroupsMatrix peakGroupsMatrix
#' @param subset subset
#' @param subsetAdjust subsetAdjust
#' @importFrom stats approx
#' @importFrom stats lsfit
#' @importFrom stats loess
#' @importFrom stats predict
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
RT.Adjust_peakGroup <-
  function(peaks, peakIndex, rtime, minFraction = 0.9, extraPeaks = 1,
           smooth = c("loess", "linear"), span = 0.2,
           family = c("gaussian", "symmetric"),
           peakGroupsMatrix = matrix(ncol = 0, nrow = 0),
           subset = integer(),
           subsetAdjust = c("average", "previous"))  {
   
    if(!exists(".SwapEnv")){
      .SwapEnv <<- new.env(parent = .GlobalEnv);
      .SwapEnv$.optimize_switch <- FALSE;
      .SwapEnv$count_current_sample <- 0;
      .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
      .SwapEnv$envir <- new.env();
    }
    
    .optimize_switch <- .SwapEnv$.optimize_switch;
    if(is.null(.optimize_switch)){
      .optimize_switch <- FALSE;
    }
    
    subsetAdjust <- match.arg(subsetAdjust)
    ## Check input.
    if (missing(peaks) | missing(peakIndex) | missing(rtime))
      stop("Arguments 'peaks', 'peakIndex' and 'rtime' are required!")
    smooth <- match.arg(smooth)
    family <- match.arg(family)
    ## minFraction
    if (any(minFraction > 1) | any(minFraction < 0))
      stop("'minFraction' has to be between 0 and 1!")
    ## Check peakIndex:
    if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks)))))
      stop("Some indices listed in 'peakIndex' are outside of ",
           "1:nrow(peaks)!")
    ## Check rtime:
    if (!is.list(rtime))
      stop("'rtime' should be a list of numeric vectors with the retention ",
           "times of the spectra per sample!")
    if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE)))
      stop("'rtime' should be a list of numeric vectors with the retention ",
           "times of the spectra per sample!")
    if (length(rtime) != max(peaks[, "sample"]))
      stop("The length of 'rtime' does not match with the total number of ",
           "samples according to the 'peaks' matrix!")
    total_samples <- length(rtime)
    
    if (length(subset)) {
      if (!is.numeric(subset))
        stop("If provided, 'subset' is expected to be an integer")
      if (!all(subset %in% seq_len(total_samples)))
        stop("One or more indices in 'subset' are out of range.")
      if (length(subset) < 2)
        stop("Length of 'subset' too small: minimum required samples for ",
             "alignment is 2.")
    } else {
      subset <- seq_len(total_samples)
    }
    
    ## Translate minFraction to number of allowed missing samples.
    nSamples <- length(subset)
    missingSample <- nSamples - (nSamples * minFraction)
    ## Remove peaks not present in "subset" from the peakIndex
    peaks_in_subset <- which(peaks[, "sample"] %in% subset)
    peakIndex <- lapply(peakIndex, function(z) z[z %in% peaks_in_subset])
    ## Check if we've got a valid peakGroupsMatrix
    ## o Same number of samples.
    ## o range of rt values is within the rtime.
    if (nrow(peakGroupsMatrix)) {
      if (ncol(peakGroupsMatrix) != nSamples)
        stop("'peakGroupsMatrix' has to have the same number of columns ",
             "as there are samples!")
      pg_range <- range(peakGroupsMatrix, na.rm = TRUE)
      rt_range <- range(rtime)
      if (!(pg_range[1] >= rt_range[1] & pg_range[2] <= rt_range[2]))
        stop("The retention times in 'peakGroupsMatrix' have to be within",
             " the retention time range of the experiment!")
      rt <- peakGroupsMatrix
    } else
      rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, subset,
                                   missingSample, extraPeaks)
    if (ncol(rt) != length(subset))
      stop("Length of 'subset' and number of columns of the peak group ",
           "matrix do not match.")
   
    if (length(rt) == 0)
      stop("No peak groups found in the data for the provided settings")
    
    if (.on.public.web() & !.optimize_switch){
      
      print_mes <- paste("Performing retention time correction using ", nrow(rt)," peak groups.");    
      write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
      
    } else if(.optimize_switch) {
      #Do nothing
    } else {
      message("Performing retention time correction using ", nrow(rt)," peak groups.")
    }
    
    ## Calculate the deviation of each peak group in each sample from its
    ## median
    rtdev <- rt - apply(rt, 1, median, na.rm = TRUE)
    
    if (smooth == "loess") {
      mingroups <- min(colSums(!is.na(rt)))
      if (mingroups < 4) {
        smooth <- "linear"
        if(!.SwapEnv$.optimize_switch){
        warning("Too few peak groups for 'loess', reverting to linear",
                " method")
        }
      } else if (mingroups * span < 4) {
        span <- 4 / mingroups
        if(!.SwapEnv$.optimize_switch){
        warning("Span too small for 'loess' and the available number of ",
                "peak groups, resetting to ", round(span, 2))
        }
      }
    }
    
    rtdevsmo <- vector("list", nSamples)
    
    ## Code for checking to see if retention time correction is overcorrecting
    rtdevrange <- range(rtdev, na.rm = TRUE)
    warn.overcorrect <- FALSE
    warn.tweak.rt <- FALSE
    
    rtime_adj <- rtime
    ## Adjust samples in subset.
    for (i in seq_along(subset)) {
      i_all <- subset[i]              # Index of sample in whole dataset.
      pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i]))
      
      ## order the data.frame such that rt and rtdev are increasingly ordered.
      pk_idx <- order(pts$rt, pts$rtdev)
      pts <- pts[pk_idx, ]
      if (smooth == "loess") {
        lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span,
                                     degree = 1, family = family))
        
        rtdevsmo[[i]] <- na.flatfill(
          predict(lo, data.frame(rt = rtime[[i_all]])))
        ## Remove singularities from the loess function
        rtdevsmo[[i]][abs(rtdevsmo[[i]]) >
                        quantile(abs(rtdevsmo[[i]]), 0.9,
                                 na.rm = TRUE) * 2] <- NA
        if (length(naidx <- which(is.na(rtdevsmo[[i]]))))
          rtdevsmo[[i]][naidx] <- suppressWarnings(
            approx(na.omit(data.frame(rtime[[i_all]], rtdevsmo[[i]])),
                   xout = rtime[[i_all]][naidx], rule = 2)$y
          )
        
        ## Check if there are adjusted retention times that are not ordered
        ## increasingly. If there are, search for each first unordered rt
        ## the next rt that is larger and linearly interpolate the values
        ## in between (see issue #146 for an illustration).
        while (length(decidx <- which(diff(rtime[[i_all]] - rtdevsmo[[i]]) < 0))) {
          warn.tweak.rt <- TRUE  ## Warn that we had to tweak the rts.
          rtadj <- rtime[[i_all]] - rtdevsmo[[i]]
          rtadj_start <- rtadj[decidx[1]] ## start interpolating from here
          ## Define the
          next_larger <- which(rtadj > rtadj[decidx[1]])
          if (length(next_larger) == 0) {
            ## Fix if there is no larger adjusted rt up to the end.
            next_larger <- length(rtadj) + 1
            rtadj_end <- rtadj_start
          } else {
            next_larger <- min(next_larger)
            rtadj_end <- rtadj[next_larger]
          }
          ## linearly interpolate the values in between.
          adj_idxs <- (decidx[1] + 1):(next_larger - 1)
          incr <- (rtadj_end - rtadj_start) / length(adj_idxs)
          rtdevsmo[[i]][adj_idxs] <- rtime[[i_all]][adj_idxs] -
            (rtadj_start + (seq_along(adj_idxs)) * incr)
        }
        
        rtdevsmorange <- range(rtdevsmo[[i]]);
        
        divRes <- unique((rtdevsmorange / rtdevrange > 2));
        if(!is.na(divRes)[1]){
          if (any(divRes))
            warn.overcorrect <- TRUE
        }
        
      } else {
        if (nrow(pts) < 2 & !.optimize_switch) {
          stop("No enough peak groups even for linear smoothing available! Please check your data quality or decrease \"bw\" util to 1.")
        }
        ## Use lm instead?
        fit <- suppressWarnings(lsfit(pts$rt, pts$rtdev));
        rtdevsmo[[i]] <- rtime[[i_all]] * fit$coef[2] + fit$coef[1];
        ptsrange <- range(pts$rt);
        minidx <- rtime[[i_all]] < ptsrange[1];
        maxidx <- rtime[[i_all]] > ptsrange[2];
        rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)];
        rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)];
      }
      ## Finally applying the correction
      rtime_adj[[i_all]] <- rtime[[i_all]] - rtdevsmo[[i]]
    }
    
    ## Adjust the remaining samples.
    rtime_adj <- adjustRtimeSubset(rtime, rtime_adj, subset = subset,
                                   method = subsetAdjust)
    
    if (warn.overcorrect & !.SwapEnv$.optimize_switch) {
      warning("Fitted retention time deviation curves exceed points by more",
              " than 2x. This is dangerous and the algorithm is probably ",
              "overcorrecting your data. Consider increasing the span ",
              "parameter or switching to the linear smoothing method.")
    }
    
    if (warn.tweak.rt & !.SwapEnv$.optimize_switch) {
      warning(call. = FALSE, "Adjusted retention times had to be ",
                "re-adjusted for some files to ensure them being in the same",
                " order than the raw retention times. A call to ",
                "'dropAdjustedRtime' might thus fail to restore retention ",
                "times of chromatographic peaks to their original values. ",
                "Eventually consider to increase the value of the 'span' ",
                "parameter.")
    }
    rtime_adj
  }

mSet.obiwarp <- function(mSet, object, param) { ## Do not use the params defined by user for now!
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  param <- list();
  
  param$binSize <- 1;
  param$response <- 1;
  param$distFun <- "cor_opt";
  param$gapInit <- 0.3;
  param$gapExtend <- 2.4;
  param$factorDiag <- 2;
  param$factorGap <- 1;
  param$localAlignment <- FALSE;
  param$initPenalty <- 0;
  
  # 1.1). Subset selection
  
  Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");

  if (length(Subset_QC) < 30){
    Subset <- integer(0);
  } else {
    Subset <- Subset_QC;
  }
  
  subs <- Subset;
  
  if (!length(subs))
    subs <- seq_along(fileNames(object))
  
  total_samples <- length(fileNames(object))
  nSamples <- length(subs)
  
  if (nSamples <= 1)
    stop("Can not perform a retention time correction on less than two",
         " files.")
  
  centerSample <- floor(median(seq_len(nSamples)));
  
  if (.on.public.web() & !.optimize_switch){
    
    print_mes <- paste0("Sample number ", centerSample, " used as center sample.\n");    
    write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
    #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
    
  } else {
    MessageOutput(paste0("Sample number ", centerSample, " used as center sample.\n"), "", NULL)
  } 

  rtraw <- split(rtime(object), fromFile(object))
  object <- filterFile(object, file = subs)
  ## Get the profile matrix of the center sample:
  ## Using the (hidden) parameter returnBreaks to return also the breaks of
  ## the bins of the profile matrix. I can use them to align the matrices
  ## later.
  ## NOTE: it might be event better to just re-use the breaks from the center
  ## sample for the profile matrix generation of all following samples.
  suppressMessages(
    profCtr <- profMat(object, method = "bin",
                       step = param$binSize,
                       fileIndex = centerSample,
                       returnBreaks = TRUE)[[1]]
  )
  ## Now split the object by file
  objL <- splitByFile(object, f = factor(seq_len(nSamples)))
  objL <- objL[-centerSample]
  centerObject <- filterFile(object, file = centerSample)
  ## Now we can bplapply here!
  res <- lapply(objL, function(z, cntr, cntrPr, parms) {
    
    message("Aligning ", basename(fileNames(z)), " against ",
            basename(fileNames(cntr)), " ... ", appendLF = TRUE)
    
    ## Get the profile matrix for the current file.
    suppressMessages(
      curP <- profMat(z, method = "bin", step = parms$binSize,
                      returnBreaks = TRUE)[[1]]
    )
    
    ## ---------------------------------------
    ## 1)Check the scan times of both objects:
    scantime1 <- unname(rtime(cntr))
    scantime2 <- unname(rtime(z))
    ## median difference between spectras' scan time.
    mstdiff <- median(c(diff(scantime1), diff(scantime2)))

    mst1 <- which(diff(scantime1) > 5 * mstdiff)[1]
    if (!is.na(mst1)) {
      scantime1 <- scantime1[seq_len((mst1 - 1))]

      if (!.optimize_switch){
        MessageOutput(paste0("Found gaps in scan times of the center sample: cut ", "scantime-vector at ", scantime1[mst1]," seconds."),
                      "", NULL)
      } 

      if (.on.public.web() & !.optimize_switch){
        
        print_mes <- paste0("Found gaps in scan times of the center sample: cut ", "scantime-vector at ", scantime1[mst1]," seconds.");    
        write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
        #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
        
      } else {
        
        message("Found gaps in scan times of the center sample: cut ",
                "scantime-vector at ", scantime1[mst1]," seconds.")
      }
      
    }
    
    mst2 <- which(diff(scantime2) > 5 * mstdiff)[1]
    
    if(!is.na(mst2)) {
      scantime2 <- scantime2[seq_len((mst2 - 1))]

      if (!.optimize_switch){
        MessageOutput(paste0("Found gaps in scan time of file ", basename(fileNames(z)), ": cut scantime-vector at ", scantime2[mst2]," seconds."),
                      "", NULL)
      } 

      if (.on.public.web() & !.optimize_switch){
        
        print_mes <- paste0("Found gaps in scan time of file ", basename(fileNames(z)), ": cut scantime-vector at ", scantime2[mst2]," seconds.");    
        write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
        #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
        
      } else {
        
        message("Found gaps in scan time of file ", basename(fileNames(z)),
                ": cut scantime-vector at ", scantime2[mst2]," seconds.")
      }

    }
    ## Drift of measured scan times - expected to be largest at the end.
    rtmaxdiff <- abs(diff(c(scantime1[length(scantime1)],
                            scantime2[length(scantime2)])))
    ## If the drift is larger than the threshold, cut the matrix up to the
    ## max allowed difference.
    if (rtmaxdiff > (5 * mstdiff)) {
      rtmax <- min(scantime1[length(scantime1)],
                   scantime2[length(scantime2)])
      scantime1 <- scantime1[scantime1 <= rtmax]
      scantime2 <- scantime2[scantime2 <= rtmax]
    }
    valscantime1 <- length(scantime1)
    valscantime2 <- length(scantime2)
    
    ## Ensure we have the same number of scans.
    if (valscantime1 != valscantime2) {
      min_number <- min(valscantime1, valscantime2)
      diffs <- abs(range(scantime1) - range(scantime2))
      ## Cut at the start or at the end, depending on where we have the
      ## larger difference
      if (diffs[2] > diffs[1]) {
        scantime1 <- scantime1[seq_len(min_number)]
        scantime2 <- scantime2[seq_len(min_number)]
      } else {
        scantime1 <- rev(rev(scantime1)[seq_len(min_number)])
        scantime2 <- rev(rev(scantime2)[seq_len(min_number)])
      }
      valscantime1 <- length(scantime1)
      valscantime2 <- length(scantime2)
    }
    
    ## Finally, restrict the profile matrix to the restricted data
    if (ncol(cntrPr$profMat) != valscantime1) {
      ## Find out whether we were cutting at the start or end.
      start_idx <- which(scantime1[1] == rtime(cntr))
      end_idx <- which(scantime1[length(scantime1)] == rtime(cntr))
      cntrPr$profMat <- cntrPr$profMat[, start_idx:end_idx]
    }
    if(ncol(curP$profMat) != valscantime2) {
      start_idx <- which(scantime2[1] == rtime(z))
      end_idx <- which(scantime2[length(scantime2)] == rtime(z))
      curP$profMat <- curP$profMat[, start_idx:end_idx]
    }
    
    ## ---------------------------------
    ## 2) Now match the breaks/mz range.
    ##    The -1 below is because the breaks define the upper and lower
    ##    boundary. Have to do it that way to be in line with the orignal
    ##    code... would be better to use the breaks as is.
    
    mzr1 <- c(cntrPr$breaks[1], cntrPr$breaks[length(cntrPr$breaks) - 1])
    mzr2 <- c(curP$breaks[1], curP$breaks[length(curP$breaks) - 1])
    mzmin <- min(c(mzr1[1], mzr2[1]))
    mzmax <- max(c(mzr1[2], mzr2[2]))
    mzs <- seq(mzmin, mzmax, by = parms$binSize)
    
    ## Eventually add empty rows at the beginning
    if (mzmin < mzr1[1]) {
      tmp <- matrix(0, (length(seq(mzmin, mzr1[1], parms$binSize)) - 1),
                    ncol = ncol(cntrPr$profMat))
      cntrPr$profMat <- rbind(tmp, cntrPr$profMat)
    }
    
    ## Eventually add empty rows at the end
    if (mzmax > mzr1[2]) {
      tmp <- matrix(0, (length(seq(mzr1[2], mzmax, parms$binSize)) - 1),
                    ncol = ncol(cntrPr$profMat))
      cntrPr$profMat <- rbind(cntrPr$profMat, tmp)
    }
    
    ## Eventually add empty rows at the beginning
    if (mzmin < mzr2[1]) {
      tmp <- matrix(0, (length(seq(mzmin, mzr2[1], parms$binSize)) - 1),
                    ncol = ncol(curP$profMat))
      curP$profMat <- rbind(tmp, curP$profMat)
    }
    
    ## Eventually add empty rows at the end
    if (mzmax > mzr2[2]) {
      tmp <- matrix(0, (length(seq(mzr2[2], mzmax, parms$binSize)) - 1),
                    ncol = ncol(curP$profMat))
      curP$profMat <- rbind(curP$profMat, tmp)
    }
    
    ## A final check of the data.
    mzvals <- length(mzs)
    cntrVals <- length(cntrPr$profMat)
    curVals <- length(curP$profMat)
    
    if ((mzvals * valscantime1) != cntrVals | (mzvals * valscantime2) != curVals)
      stop("Dimensions of profile matrices of files ",
           basename(fileNames(cntr)), " and ", basename(fileNames(z)),
           " do not match!")

    if (.on.public.web() & !.optimize_switch){
      
      #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
      #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
      
    }
    

    ## Done with preparatory stuff - now I can perform the alignment.
    rtadj <- R_set_obiwarpR(valscantime1, scantime1, mzvals, mzs, cntrPr, valscantime2, scantime2, curP, parms);


    if (.on.public.web() & !.optimize_switch){
      
      #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
      #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
      
    }
 
    if (length(rtime(z)) != valscantime2) {
      nrt <- length(rtime(z))
      adj_starts_at <- which(rtime(z) == scantime2[1])
      adj_ends_at <- which(rtime(z) == scantime2[length(scantime2)])
      if (adj_ends_at < nrt)
        rtadj <- c(rtadj, rtadj[length(rtadj)] +
                     cumsum(diff(rtime(z)[adj_ends_at:nrt])))
      if (adj_starts_at > 1)
        rtadj <- c(rtadj[1] +
                     rev(cumsum(diff(rtime(z)[adj_starts_at:1]))), rtadj)
    }
    
    MessageOutput("OK", "\n", NULL)
    
    if (.on.public.web() & !.optimize_switch){

      MessageOutput(paste0("Aligning ", basename(fileNames(z)), " against ", basename(fileNames(cntr)), " ... OK\n"), 
                    "", 
                    NULL)

      #print_mes <- paste0("Aligning ", basename(fileNames(z)), " against ", basename(fileNames(cntr)), " ... OK\n");    
      #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
      #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);

    }
    
    return(unname(rtadj))
    
  }, cntr = centerObject, cntrPr = profCtr, parms = param)
  
  ## Create result
  adjRt <- vector("list", total_samples)
  adjRt[subs[centerSample]] <- list(unname(rtime(centerObject)))
  adjRt[subs[-centerSample]] <- res
  adjustRtimeSubset(rtraw, adjRt, subset = subs, method = "average")
  
}

#' @title PerformPeakFiling
#' @description PerformPeakFiling
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam().
#' @export
#' @return will return an mSet object with peak gaps filled
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @examples 
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#'                full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' SetGlobalParallel(1);
#' mSet <- PerformPeakFiling(mSet);
#' register(bpstop());
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)

PerformPeakFiling <- function(mSet, BPPARAM=bpparam()){
  
  if(missing(mSet) & file.exists("mSet.rda")){
    load("mSet.rda")
  } else if(missing(mSet) & !file.exists("mSet.rda")){
    if(.on.public.web()){
      MessageOutput("ERROR: mSet is missing !", NULL, NULL);
      stop();
    } else {
      stop("mSet is missing ! Please make sure mSet is well defined !");
    }
  }
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  param <- mSet@params;
  
  if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL);
      stop();
    } else {
      stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !");
    }
  }
  
  if(length(mSet@peakRTcorrection) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No Retention Time Correction results found. Please \"PerformRTcorrection\" or \"PerformPeakAlignment\" first !");
      stop();
    } else {
      stop("No Retention Time Correction results found. Please \"PerformRTcorrection\" or \"PerformPeakAlignment\" first !");
    }
  }  
  ## Preparing Something
  if (!.optimize_switch) {
    MessageOutput(paste("Starting peak filling!"), ecol = "\n", 74)
  } 
  
  fixedRt <- fixedMz <- expandRt <- expandMz <- 0
  
  if (is.null(param$ppm)){
    warning("No ppm detected, will use ppm = 10 instead for peak filling !")
    ppm <-10
  } else {
    ppm <- param$ppm;
  }
  
  if (!.optimize_switch){
    MessageOutput(paste("Defining peak areas for filling-in."), ecol = "", NULL)
  } 
  
  aggFunLow <- median
  aggFunHigh <- median;

  ngroup <- length(mSet@peakgrouping);
  #tmp_pks <- mSet$msFeatureData$chromPeaks[, c("rtmin", "rtmax", "mzmin", "mzmax")];
  tmp_pks <- mSet@peakRTcorrection$chromPeaks[, c("rtmin", "rtmax", "mzmin", "mzmax")];
  ### complete peak table should be kept here
  fdef <- mSet@peakgrouping[[ngroup]];
  
  pkArea <- do.call(rbind,
                    lapply(fdef$peakidx, function(z) {
                      pa <- c(aggFunLow(tmp_pks[z, 1]),
                              aggFunHigh(tmp_pks[z, 2]),
                              aggFunLow(tmp_pks[z, 3]),
                              aggFunHigh(tmp_pks[z, 4]))
    ## Check if we have to apply ppm replacement:
    if (ppm != 0) {
      mzmean <- mean(pa[3:4])
      tittle <- mzmean * (ppm / 2) / 1E6
      if ((pa[4] - pa[3]) < (tittle * 2)) {
        pa[3] <- mzmean - tittle
        pa[4] <- mzmean + tittle
      }
    }
    ## Expand it.
    if (expandRt != 0) {
      diffRt <- (pa[2] - pa[1]) * expandRt / 2
      pa[1] <- pa[1] - diffRt
      pa[2] <- pa[2] + diffRt
    }
    if (expandMz != 0) {
      diffMz <- (pa[4] - pa[3]) * expandMz / 2
      pa[3] <- pa[3] - diffMz
      pa[4] <- pa[4] + diffMz
    }
    if (fixedMz != 0) {
      pa[3] <- pa[3] - fixedMz
      pa[4] <- pa[4] + fixedMz
    }
    if (fixedRt != 0) {
      pa[1] <- pa[1] - fixedRt
      pa[2] <- pa[2] + fixedRt
    }
    pa
  }))
  
  rm(tmp_pks)
  
  if (!.optimize_switch){
    MessageOutput(".", ecol = "", 75)
  }
  
  colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax")
  ## Add mzmed column - needed for MSW peak filling.
  pkArea <- cbind(group_idx = seq_len(nrow(pkArea)), pkArea,
                  mzmed = as.numeric(fdef$mzmed))
  
  if (length(mSet@rawOnDisk) != 0){
    
    if(is.null(param$BlankSub)){
      param$BlankSub <- FALSE;
    }
    if(param$BlankSub){
      specdata0 <- mSet@rawOnDisk;
      blk2rms <- which(specdata0@phenoData@data[["sample_group"]] == "BLANK");
      fdnew <- specdata0@featureData
      fdnew@data <- fdnew@data[!(fdnew@data[["fileIdx"]] %in% blk2rms),];
      #fdnew@data[["fileIdx"]] <- fdnew@data[["fileIdx"]] - length(blk2rms)
      ii <- 1;
      for(fii in unique(fdnew@data[["fileIdx"]])){
        fdnew@data[["fileIdx"]][fdnew@data[["fileIdx"]] == fii] <- ii;
        ii <- ii + 1
      }
      
      pdnew <- specdata0@phenoData;
      pdnew@data <- pdnew@data[-blk2rms,];
      numfiles <- length(specdata0@experimentData@instrumentModel) - length(blk2rms);
      
      specdata <- new(
        "OnDiskMSnExp",
        processingData = new("MSnProcess",
                             files = specdata0@processingData@files[-blk2rms]),
        featureData = fdnew,
        phenoData = pdnew,
        experimentData = new("MIAPE",
                             instrumentManufacturer = rep("a",numfiles),
                             instrumentModel = rep("a",numfiles),
                             ionSource = rep("a",numfiles),
                             analyser = rep("a",numfiles),
                             detectorType = rep("a",numfiles)))
    } else {
      specdata <- mSet@rawOnDisk;
    }
  } else {
    specdata <- mSet@rawInMemory;
  }
  
  fNames <- specdata@phenoData@data[["sample_name"]]
  #pks <- mSet$msFeatureData$chromPeaks
  pks <- mSet@peakRTcorrection[["chromPeaks"]]
  
  # if(!is.null(param$BlankSub)){
  #   if(param$BlankSub){
  #     fNames <- fNames[specdata@phenoData@data$sample_group != "BLANK"]
  #     #pks <- pks[pks[,"sample"] !=0,]
  #   }
  # }
  
  pkGrpVal <-
    .feature_values(pks = pks, fts = mSet@peakgrouping[[ngroup]],
                    method = "medret", value = "index",
                    intensity = "into", colnames = fNames,
                    missing = NA)
  
  if (!.optimize_switch){
    MessageOutput(".","",76)
  }
  
  ## Check if there is anything to fill...
  if (!any(is.na(rowSums(pkGrpVal)))) {
    if (!.optimize_switch){
      MessageOutput("\nNo missing peaks present.","\n",76)
    }
    # mSet@peakfilling <-""; # need to save something here
    mSet@peakfilling[["msFeatureData"]][["chromPeaks"]] <- 
      mSet@peakRTcorrection$chromPeaks;
    
    mSet@peakfilling[["msFeatureData"]][["adjustedRT"]] <- 
      mSet@peakRTcorrection$adjustedRT;
    
    mSet@peakfilling[["msFeatureData"]][["chromPeakData"]] <- 
      mSet@peakpicking[["chromPeakData"]];
    
    mSet@peakfilling[["FeatureGroupTable"]] <-
      mSet@peakgrouping[[length(mSet@peakgrouping)]];
    
    if(.on.public.web() & !.optimize_switch){
      save(mSet, file = "mSet.rda");
    }
    
    return(mSet)
  }
  
  if (!.optimize_switch){
    MessageOutput(".","",76)
  }
  
  ## Split the object by file and define the peaks for which
  pkAreaL <- objectL <- vector("list", length(fNames))
  
  ## We need "only" a list of OnDiskMSnExp, one for each file but
  ## instead of filtering by file we create small objects to keep
  ## memory requirement to a minimum.
  if (is(specdata,"OnDiskMSnExp")){
    
    req_fcol <- requiredFvarLabels("OnDiskMSnExp")
    min_fdata <- specdata@featureData@data[, req_fcol]
    
    ###
    res <- mSet@peakRTcorrection$adjustedRT
    res <- unlist(res, use.names = FALSE)
    
    fidx <- fData(specdata)$fileIdx # a issue for inmemory data
    names(fidx) <- featureNames(specdata)
    sNames <- unlist(split(featureNames(specdata), fidx),
                     use.names = FALSE)
    
    names(res) <- sNames
    res <- res[featureNames(specdata)]
    
    min_fdata$retentionTime <- res
    
    for (i in seq_along(specdata@phenoData@data[["sample_name"]])) {
      
      fd <- min_fdata[min_fdata$fileIdx == i, ]
      fd$fileIdx <- 1
      
      objectL[[i]] <- new(
        "OnDiskMSnExp",
        processingData = new("MSnProcess",
                             files = specdata@processingData@files[i]),
        featureData = new("AnnotatedDataFrame", fd),
        phenoData = new("NAnnotatedDataFrame",
                        data.frame(sampleNames = "1")),
        experimentData = new("MIAPE",
                             instrumentManufacturer = "a",
                             instrumentModel = "a",
                             ionSource = "a",
                             analyser = "a",
                             detectorType = "a"))
      
      ## Want to extract intensities only for peaks that were not
      ## found in a sample.
      
      pkAreaL[[i]] <- pkArea[is.na(pkGrpVal[, i]), , drop = FALSE]
    }
    
    if (!.optimize_switch){
      MessageOutput(paste("OK\nStart integrating peak areas from original files..."), "\n", 77)
    } 
    
    cp_colnames <- colnames(mSet@peakRTcorrection[["chromPeaks"]])
    
    ## Extraction designed for centWave
    res <- bpmapply(FUN = .getChromPeakData, 
                    objectL,
                    pkAreaL, 
                    as.list(seq_along(objectL)),
                    MoreArgs = list(cn = cp_colnames,
                                    mzCenterFun = "weighted.mean"),
                    BPPARAM = BPPARAM, 
                    SIMPLIFY = FALSE)
    
    register(bpstop());
    
  } else {

    mzCenterFun = "weighted.mean";
    scan_set<-names(specdata@assayData);
    scan_set_ordered <- sort(scan_set);
    
    assayData <- sapply(scan_set_ordered, FUN = function(x){specdata@assayData[[x]]})
    
    #assayData <- lapply(scan_set,FUN=function(x){mSet[[format]]@assayData[[x]]});
    assayData <- split(assayData,fromFile(specdata));
    
    #rtim_all <- split(rtime(specdata),fromFile(specdata)) has to used the corrected RT
    rtim_all <- mSet@peakRTcorrection$adjustedRT;
    filesname <- basename(MSnbase::fileNames(specdata));
    res_new <-list();
    
    for (ii in seq_along(specdata@phenoData@data[["sample_name"]])) {
      
      cn <-colnames(mSet@peakRTcorrection$chromPeaks);
      peakArea <- pkArea[is.na(pkGrpVal[, ii]), , drop = FALSE];
      
      ncols <- length(cn);
      res <- matrix(ncol = ncols, nrow = nrow(peakArea));
      colnames(res) <- cn;
      
      res[, "sample"] <- ii;
      res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")];
      ## Load the data

      if (!.optimize_switch){
        MessageOutput(mes = paste0("Requesting ", nrow(res), " peaks from ", filesname[ii], " ... "), "", NULL)          
      } 
      
      spctr <- assayData[[ii]];
      mzs <- lapply(spctr, mz);
      valsPerSpect <- lengths(mzs);
      ints <- unlist(lapply(spctr, intensity), use.names = FALSE);
      rm(spctr);
      
      mzs <- unlist(mzs, use.names = FALSE)
      mzs_range <- range(mzs)
      rtim <- rtim_all[[ii]]
      rtim_range <- range(rtim)
      
      for (i in seq_len(nrow(res))) {
        rtr <- peakArea[i, c("rtmin", "rtmax")]
        mzr <- peakArea[i, c("mzmin", "mzmax")]
        ## If the rt range is completely out; additional fix for #267
        if (rtr[2] < rtim_range[1] | rtr[1] > rtim_range[2] |
            mzr[2] < mzs_range[1] | mzr[1] > mzs_range[2]) {
          res[i, ] <- rep(NA_real_, ncols)
          next
        }
        ## Ensure that the mz and rt region is within the range of the data.
        rtr[1] <- max(rtr[1], rtim_range[1])
        rtr[2] <- min(rtr[2], rtim_range[2])
        mzr[1] <- max(mzr[1], mzs_range[1])
        mzr[2] <- min(mzr[2], mzs_range[2])
        mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim,
                       valsPerSpect = valsPerSpect, rtrange = rtr,
                       mzrange = mzr)
        
        if (length(mtx)) {
          if (any(!is.na(mtx[, 3]))) {
            res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) *
              ((rtr[2] - rtr[1]) /
                 max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)))
            maxi <- which.max(mtx[, 3])
            res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)]
            res[i, c("rtmin", "rtmax")] <- rtr
            ## Calculate the intensity weighted mean mz
            meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
            if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
            res[i, "mz"] <- meanMz
          } else {
            res[i, ] <- rep(NA_real_, ncols)
          }
        } else {
          res[i, ] <- rep(NA_real_, ncols)
        }
      }
      
      if (!.optimize_switch) {
        MessageOutput(
          paste0("got ", sum(!is.na(res[, "into"])), "."),
          "\n",
          77 + ii / length(specdata@phenoData@data[["sample_name"]]) * 10
        )
      }
      res_new[[ii]] <- res;
      pkAreaL[[ii]] <- pkArea[is.na(pkGrpVal[, ii]), , drop = FALSE];
    }
    
    res <-res_new;
  }
  
  res <- do.call(rbind, res)
  ## cbind the group_idx column to track the feature/peak group.
  res <- cbind(res, group_idx = do.call(rbind, pkAreaL)[, "group_idx"])
  ## Remove those without a signal
  res <- res[!is.na(res[, "into"]), , drop = FALSE]
  
  if (nrow(res) == 0) {
    if(!.optimize_switch){
      warning("Could not integrate any signal for the missing ",
              "peaks! Consider increasing 'expandMz' and 'expandRt'.");
    }
    
    if(.on.public.web() & !.optimize_switch){
      save(mSet, file = "mSet.rda");
    }
    
    return(mSet)
  }
  
  ## Get the msFeatureData:
  newFd <- mSet@peakRTcorrection
  
  incr <- nrow(mSet@peakRTcorrection[["chromPeaks"]])
  for (i in unique(res[, "group_idx"])) {
    fdef$peakidx[[i]] <- c(fdef$peakidx[[i]],
                           (which(res[, "group_idx"] == i) + incr))
  }
  ## Define IDs for the new peaks; include fix for issue #347
  maxId <- max(as.numeric(sub("^CP", "",
                              rownames(mSet@peakRTcorrection[["chromPeaks"]]))))
  if (maxId < 1)
    stop("chromPeaks matrix lacks rownames; please update ",
         "'object' with the 'updateObject' function.")
  toId <- maxId + nrow(res)
  
  rownames(res) <- sprintf(
    paste0("CP", "%0", ceiling(log10(toId + 1L)), "d"),
    (maxId + 1L):toId)
  
  newFd[["chromPeaks"]] <- rbind(mSet@peakRTcorrection[["chromPeaks"]],
                                 res[, -ncol(res)])
  
  cpd <- mSet@peakpicking[["chromPeakData"]][rep(1L, nrow(res)), , drop = FALSE]
  cpd[,] <- NA
  cpd$ms_level <- as.integer(1)
  cpd$is_filled <- TRUE
  if(length(cpd$Bank2rm) > 0){
    cpd$Bank2rm <- FALSE
  }
  
  if(param$BlankSub){
    incluVec <- mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]];
    newFd$chromPeakData <- rbind(mSet@peakpicking[["chromPeakData"]][!incluVec,], cpd);
    # newFd[["chromPeaks"]] <- rbind(mSet@peakRTcorrection[["chromPeaks"]][!incluVec,],
    #                                res[, -ncol(res)]);
    # also need to correct group peak idx
    # rnms <- row.names(newFd[["chromPeaks"]]);
    # oldidx <- as.numeric(gsub("^CP", "",rnms));
    # newidx <- c(1:length(oldidx));
    # fdef@listData[["peakidx"]] <- lapply(fdef@listData[["peakidx"]], function(x){
    #   newidx[match(x, oldidx)]
    # })
  } else {
    newFd$chromPeakData <- rbind(mSet@peakpicking[["chromPeakData"]], cpd)
  }
  
  rownames(newFd$chromPeakData) <- rownames(newFd$chromPeaks);
  
  mSet@peakfilling$msFeatureData <- newFd;
  mSet@peakfilling$FeatureGroupTable <- fdef;

  if(.on.public.web() & !.optimize_switch){
    save(mSet, file = "mSet.rda");
  }
  
  register(bpstop());
  
  return(mSet)
}

#' @title PerformBlankSubstraction
#' @description PerformBlankSubstraction
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam().
#' @export
#' @noRd
#' @return will return an mSet object with peak gaps filled
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
PerformBlankSubstraction <- function(mSet){
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  if(!mSet@params$BlankSub){
    if(!.optimize_switch){
      MessageOutput("Blank substraction will not be performed!", ecol = "\n")
    }
    return(mSet)
  }
  
  if(!is(mSet@peakgrouping[[1]], "DFrame")){
    stop("mSet object doesnot include peak grouping info! Please 'PerformPeakGrouping' first!")
  }
  dm <- mSet@peakgrouping[[1]]@listData
  dm$peakidx <- NULL;
  if(is.null(dm$BLANK)){
    mSet@params[["BlankSub"]] <- FALSE;
    MessageOutput("No blank sample included based on grouping info. Skipped!", ecol = "\n")
    return(mSet)
  }
  chromPeaks <- as.data.frame(mSet@peakpicking[["chromPeaks"]]);
  peakidx <- mSet@peakgrouping[[1]]@listData[["peakidx"]];
  BlkPeak <- which(dm$BLANK != 0);
  BlkSample <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] == "BLANK");
  RegSample <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK");
  res <- sapply(BlkPeak, FUN = function(x) {
    idxs <- peakidx[[x]];
    feattb <- chromPeaks[idxs,]; # feature table - feattb
    blkinto <- mean(feattb$into[feattb$sample %in% BlkSample])
    smlinto <- mean(feattb$into[!(feattb$sample %in% BlkSample)])
    if(is.nan(smlinto)){return(NA)}
    smlinto/blkinto
  })
  ## Estimating the threshold for filtering
  sort_res <- sort(res, decreasing = T)
  infectPoint <- bede(x= c(1:length(sort_res)), 
       y = sort_res, 1)$iplast
  if(!is.nan(infectPoint)){
    blk_thresh <- round(sort_res[ceiling(infectPoint)],1)/5
  } else {
    blk_thresh <- round(sort_res[length(sort_res)*0.2]) # Keep top 20% FC (sample/blank) peaks
  }
  if(!is.numeric(blk_thresh)){
    MessageOutput("No threshold can be evaluted for blank substraction, will use 10 !", ecol = "\n")
    blk_thresh <- 10;
  } else {
    MessageOutput(paste0("Threshold for blank filtration has been estimated as ", blk_thresh, "..."), ecol = "\n")
  }
  
  ## Generete Inclusion list --> T is keep, F is remove
  InclusionVec <- vapply(peakidx, function(x){
    feattb <- as.data.frame(chromPeaks[x,]); # feature table - feattb
    blkinto <- mean(feattb$into[feattb$sample %in% BlkSample]);
    smlinto <- mean(feattb$into[!(feattb$sample %in% BlkSample)]);
    if(is.nan(smlinto)){
      return(FALSE)
    } else if (is.nan(blkinto)) {
      return(TRUE)
    } else if(smlinto/blkinto > blk_thresh){
      return(TRUE)
    } else {
      return(FALSE)
    }
  }, FUN.VALUE = vector(length = 1))
  
  ## Exclude blank features
  GroupingListData <- mSet@peakgrouping[[1]]@listData;
  GroupingListData_new <- lapply(GroupingListData, FUN = function(x){
    x[InclusionVec];
  })
  mSet@peakpicking[["chromPeakData"]]@listData$Bank2rm <- 
    vector(mode = "logical", 
           length = nrow(mSet@peakpicking[["chromPeakData"]]));
  chrmpks2rm <- unlist(GroupingListData[["peakidx"]][!InclusionVec])
  mSet@peakpicking[["chromPeakData"]]@listData$Bank2rm[chrmpks2rm] <- TRUE;
  
  leftBlks <- GroupingListData_new[["BLANK"]];
  GroupingListData_new[["BLANK"]] <- NULL;
  # Clean peakidx List
  blankChromPeakIdx <- which(chromPeaks$sample %in% BlkSample)
  peakidx_new <- lapply(GroupingListData_new[["peakidx"]], function(x){
    x[!(x %in% blankChromPeakIdx)]
  })
  GroupingListData_new[["peakidx"]] <- peakidx_new;
  
  mSet@peakgrouping[[1]]@listData <- GroupingListData_new;
  numFeat <- length(which(InclusionVec));
  mSet@peakgrouping[[1]]@nrows <- numFeat
  rownames(mSet@peakgrouping[[1]]) <- 
    sprintf(paste0("FT", "%0", ceiling(log10(numFeat + 1L)), "d"),
            seq(from = 1, length.out = numFeat))
  MessageOutput("Blank features have been substracted successfully!", ecol = "\n")

  return(mSet)
}


#' @title mSet2xcmsSet
#' @description mSet2xcmsSet
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787

mSet2xcmsSet <- function(mSet) {
  
  xs <- new("xcmsSet")
  #xs@peaks <- mSet[["msFeatureData"]][["chromPeaks"]]
  
  xs@peaks <- mSet@peakRTcorrection$chromPeaks;
  #fgs <- mSet$FeatureGroupTable
  fgs <- mSet@peakfilling$FeatureGroupTable
  xs@groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)])
  rownames(xs@groups) <- NULL
  xs@groupidx <- fgs$peakidx
  
  rts <- list()
  
  if (is(mSet[[2]],"OnDiskMSnExp")){
    format <- "onDiskData"
  } else {
    format <- "inMemoryData"
  }
  
  ## Ensure we're getting the raw rt
  rts$raw <- rtime(mSet[[format]])
  rts$corrected <- mSet[["msFeatureData"]][["adjustedRT"]]
  
  xs@rt <- rts
  
  ## @phenoData
  xs@phenoData <- pData(mSet[[format]])
  ## @filepaths
  xs@filepaths <- fileNames(mSet[[format]])
  
  ## @profinfo (list)
  profMethod <- NA
  profStep <- NA
  profParam <- list()
  ## If we've got any MatchedFilterParam we can take the values from there
  
  ## @mslevel <- msLevel?
  xs@mslevel <- 1
  
  ## @scanrange
  xs@scanrange <- range(scanIndex(mSet[[format]]))
  
  ## @filled ... not yet.
  if (any(mSet$msFeatureData$chromPeakData$is_filled)) {
    fld <- which(mSet$msFeatureData$chromPeakData$is_filled)
    xs@filled <- as.integer(fld)
  }
  
  return(xs)
}

#' @title updateRawSpectraParam
#' @description updateRawSpectraParam
#' @param Params object generated by SetPeakParams function.
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' @noRd
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' Mcgill University
#' License: GNU GPL (>= 2)

updateRawSpectraParam <- function (Params){
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  param <- list();
  
  # 0. Method Define
  param$Peak_method <- Params$Peak_method
  param$RT_method <- Params$RT_method
  
  # 1.1 update peak picking params - centWave
  if (param$Peak_method == "centWave"){
    
    param$ppm <- as.numeric(Params[["ppm"]]);
    param$peakwidth <- c(as.numeric(Params[["min_peakwidth"]]),
                         as.numeric(Params[["max_peakwidth"]]));
    if (param[["peakwidth"]][1] > param[["peakwidth"]][2]){
      save(Params, file="Params_error.rda");
      stop("min_peakwidth has to be less than max_peakwidth ! Please adjust...");
    };
    
    param$snthresh <- as.numeric(Params[["snthresh"]]);
    param$prefilter <- c(as.numeric(Params[["prefilter"]]), 
                         as.numeric(Params[["value_of_prefilter"]]));
    param$roiList <- list();
    param$firstBaselineCheck <- TRUE;
    param$roiScales <- numeric(0);
    
    param$mzCenterFun <- Params[["mzCenterFun"]];
    param$integrate <- Params[["integrate"]];
    param$mzdiff <- as.numeric(Params[["mzdiff"]]);
    param$fitgauss <- as.logical(Params[["fitgauss"]]);
    param$noise <- as.numeric(Params[["noise"]]);
    param$verboseColumns <- as.logical(Params[["verbose.columns"]]);
    
    param$binSize <-0.25; # density Param
    
  } else if (param$Peak_method == "matchedFilter") {
    
    param$fwhm <- as.numeric(Params$fwhm);
    param$sigma <- as.numeric(Params$sigma);
    param$steps <- as.numeric(Params$steps);
    param$snthresh <- as.numeric(Params$snthresh);
    param$peakBinSize <- as.numeric(Params$peakBinSize);
    
    param$mzdiff <- as.numeric(Params$mzdiff)
    param$bw <- as.numeric(Params$bw)
    param$max <- as.numeric(Params$max)
    
    param$impute <- "none";
    param$baseValue<- numeric(0);
    param$distance<- numeric(0);
    param$index<- FALSE;
    
    param$binSize <-0.25; # density Param
    
  } else if (param$Peak_method == "Massifquant") {
    
    param$ppm <- as.numeric(Params[["ppm"]]);
    param$peakwidth <- c(as.numeric(Params[["min_peakwidth"]]),
                         as.numeric(Params[["max_peakwidth"]]));
    if (param[["peakwidth"]][1] > param[["peakwidth"]][2]){
      stop("min_peakwidth has to be less than max_peakwidth ! Please adjust...");
    };
    
    param$snthresh <- as.numeric(Params[["snthresh"]]);
    param$prefilter <- c(as.numeric(Params[["prefilter"]]), 
                         as.numeric(Params[["value_of_prefilter"]]));
    param$roiList <- list();
    param$firstBaselineCheck <- TRUE;
    param$roiScales <- numeric(0);
    
    param$mzCenterFun <- Params[["mzCenterFun"]];
    param$integrate <- Params[["integrate"]];
    param$mzdiff <- as.numeric(Params[["mzdiff"]]);
    param$fitgauss <- as.logical(Params[["fitgauss"]]);
    param$noise <- as.numeric(Params[["noise"]]);
    param$verboseColumns <- as.logical(Params[["verbose.columns"]]);
    
    param$binSize <-0.25; # density Param
    
    # Specific Parameters
    param$criticalValue <- as.numeric(Params[["criticalValue"]]);
    param$consecMissedLimit <- as.numeric(Params[["consecMissedLimit"]]);
    param$unions <- as.numeric(Params[["unions"]]);
    param$checkBack <- as.numeric(Params[["checkBack"]]);
    param$withWave <- as.logical(Params[["withWave"]]);
    
  }
  # 2.1 update grouping params - density
  
  param$bw<-as.numeric(Params[["bw"]]);
  param$minFraction <- as.numeric(Params[["minFraction"]]);
  param$minSamples <- as.numeric(Params[["minSamples"]]);
  param$maxFeatures <- as.numeric(Params[["maxFeatures"]]);
  
  if (param[["minFraction"]][1] > 1.0) {
    stop("minFraction can not be larger than 1.0 ! Please adjust ...");
  };  
  
  # 3.1 update RT correction params - peakgroup
  
  param$extraPeaks <- as.numeric(Params[["extra"]]);
  
  param$smooth <- Params[["smooth"]];
  param$span <- as.numeric(Params[["span"]]);
  param$family <- Params[["family"]];
  
  param$subsetAdjust <- "average";
  
  # 4. Swith of Blank substraction
  param$BlankSub <- Params$BlankSub; # testing...
  
  # Finished !
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  if (.on.public.web() & !.optimize_switch){
    MessageOutput(paste("Parameters for",param$Peak_method, "have been successfully parsed!"), "\n", NULL);
  }
  
  return(param)
}

#' @title creatPeakTable
#' @description creatPeakTable
#' @param mSet mSet object, usually generated by 'PerformPeakAnnotation' here.
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)

creatPeakTable <- function(mSet){
  
  if (length(mSet@rawInMemory@phenoData[["sample_name"]]) == 1) {
    return(mSet@peakfilling$msFeatureData$chromPeaks)
  }
  
  fgs <- mSet@peakfilling$FeatureGroupTable;
  groupmat <- S4Vectors::as.matrix(fgs[, -ncol(fgs)]);
  rownames(groupmat) <- NULL;
  
  ts <- data.frame(cbind(groupmat,.groupval(mSet, value="into")), row.names = NULL)
  
  cnames <- colnames(ts)
  
  if (cnames[1] == 'mzmed') {
    cnames[1] <- 'mz'
  } else {
    stop ('mzmed column missing')
  }
  if (cnames[4] == 'rtmed') {
    cnames[4] <- 'rt'
  } else {
    stop ('mzmed column missing')
  }
  
  colnames(ts) <- cnames
  
  return(ts)
}

#' @title updateRawSpectraPath
#' @description updateRawSpectraPath
#' @param mSet mSet object generated by ImportRawMSData or the following functions.
#' @param newPath Character vector, a character vector specify the absolute path of the new raw spectra files.
#' @export
#' @return will return an mSet object with raw files' path updated
#' @examples 
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#'                full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada

updateRawSpectraPath <- function(mSet, newPath){

  oldPathLength <- length(mSet@rawOnDisk@processingData@files);
  newPathLength <- length(newPath);
  
  if(oldPathLength != newPathLength){
    stop("newPath define a wrong path character because the files number is incorrect !")
  }
  
  if(!all(sapply(newPath, file.exists))){
    stop("Not all new files exits, please check !")
  }
  
  mSet@rawOnDisk@processingData@files <- newPath;
  return(mSet);
  
}


########         -----------=========== Internal Functions From XCMS ===========-------------        #
# 
#' @references Smith, C.A., Want, E.J., O'Maille, G., Abagyan,R., Siuzdak, G. (2006). 
#               "XCMS: Processing mass spectrometry data for metabolite profiling using nonlinear 
#                 peak alignment, matching and identification." Analytical Chemistry, 78, 779-787.

adjustRtimeSubset <- function(rtraw, rtadj, subset,
                              method = c("average", "previous")) {
  method <- match.arg(method)
  if (length(rtraw) != length(rtadj))
    stop("Lengths of 'rtraw' and 'rtadj' have to match.")
  if (missing(subset))
    subset <- seq_along(rtraw)
  if (!all(subset %in% seq_along(rtraw)))
    stop("'subset' is out of bounds.")

  no_subset <- seq_len(length(rtraw))[-subset]
  for (i in no_subset) {
    message("Aligning sample number ", i, " against subset ... ",
            appendLF = TRUE)
    if (method == "previous") {
      i_adj <- .get_closest_index(i, subset, method = "previous")
      rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rtraw[[i_adj]],
                                       rtadj[[i_adj]])
    }
    if (method == "average") {
      i_ref <- c(.get_closest_index(i, subset, method = "previous"),
                 .get_closest_index(i, subset, method = "next"))
      trim_idx <- .match_trim_vector_index(rtraw[i_ref])
      rt_raw_ref <- do.call(
        cbind, .match_trim_vectors(rtraw[i_ref], idxs = trim_idx))
      rt_adj_ref <- do.call(
        cbind, .match_trim_vectors(rtadj[i_ref], idxs = trim_idx))
      wghts <- 1 / abs(i_ref - i) # weights depending on distance to i
      rt_raw_ref <- apply(rt_raw_ref, 1, weighted.mean, w = wghts)
      rt_adj_ref <- apply(rt_adj_ref, 1, weighted.mean, w = wghts)
      rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rt_raw_ref,
                                       rt_adj_ref)
    }
    message("OK")
  }
  rtadj
}

OptiChromPeaks <- function(pks, bySample = FALSE,
                           rt = numeric(), mz = numeric(),
                           ppm = 0, msLevel = integer(),
                           type = c("any", "within",
                                    "apex_within"),
                           isFilledColumn = FALSE) {
  type <- match.arg(type)
  # if (isFilledColumn)
  #   pks <- cbind(pks, is_filled = as.numeric(chromPeakData(object)$is_filled))
  # if (length(msLevel))
  #   pks <- pks[which(chromPeakData(object)$ms_level %in% msLevel), ,
  #              drop = FALSE]
  ## Select peaks within rt range.
  if (length(rt)) {
    rt <- range(rt)
    if (type == "any")
      keep <- which(pks[, "rtmin"] <= rt[2] & pks[, "rtmax"] >= rt[1])
    if (type == "within")
      keep <- which(pks[, "rtmin"] >= rt[1] & pks[, "rtmax"] <= rt[2])
    if (type == "apex_within")
      keep <- which(pks[, "rt"] >= rt[1] & pks[, "rt"] <= rt[2])
    pks <- pks[keep, , drop = FALSE]
  }
  ## Select peaks within mz range, considering also ppm
  if (length(mz) && length(pks)) {
    mz <- range(mz)
    ## Increase mz by ppm.
    if (is.finite(mz[1]))
      mz[1] <- mz[1] - mz[1] * ppm / 1e6
    if (is.finite(mz[2]))
      mz[2] <- mz[2] + mz[2] * ppm / 1e6
    if (type == "any")
      keep <- which(pks[, "mzmin"] <= mz[2] & pks[, "mzmax"] >= mz[1])
    if (type == "within")
      keep <- which(pks[, "mzmin"] >= mz[1] & pks[, "mzmax"] <= mz[2])
    if (type == "apex_within")
      keep <- which(pks[, "mz"] >= mz[1] & pks[, "mz"] <= mz[2])
    pks <- pks[keep, , drop = FALSE]
  }
  
  pks
}
getLocalNoiseEstimate <- function(d, td, ftd, noiserange, Nscantime, threshold, num) {
  
  if (length(d) < Nscantime) {
    
    ## noiserange[2] is full d-range
    drange <- which(td %in% ftd)
    n1 <- d[-drange] ## region outside the detected ROI (wide)
    n1.cp <- continuousPtsAboveThresholdIdxR(n1, threshold=threshold,num=num) ## continousPtsAboveThreshold (probably peak) are subtracted from data for local noise estimation
    n1 <- n1[!n1.cp]
    if (length(n1) > 1)  {
      baseline1 <- mean(n1)
      sdnoise1 <- sd(n1)
    } else
      baseline1 <- sdnoise1 <- 1
    
    ## noiserange[1]
    d1 <- drange[1]
    d2 <- drange[length(drange)]
    nrange2 <- c(max(1,d1 - noiserange[1]) : d1, d2 : min(length(d),d2 + noiserange[1]))
    n2 <- d[nrange2] ## region outside the detected ROI (narrow)
    n2.cp <- continuousPtsAboveThresholdIdxR(n2, threshold=threshold,num=num) ## continousPtsAboveThreshold (probably peak) are subtracted from data for local noise estimation
    n2 <- n2[!n2.cp]
    if (length(n2) > 1)  {
      baseline2 <- mean(n2)
      sdnoise2 <- sd(n2)
    } else
      baseline2 <- sdnoise2 <- 1
    
  } else {
    trimmed <- trimm(d,c(0.05,0.95))
    baseline1 <- baseline2 <- mean(trimmed)
    sdnoise1 <- sdnoise2 <- sd(trimmed)
  }
  
  c(min(baseline1,baseline2),min(sdnoise1,sdnoise2))
}

#' @importFrom stats convolve
MSW.cwt <- function (ms, scales = 1, wavelet = "mexh") { ## modified from package MassSpecWavelet
  if (wavelet == "mexh") {
    psi_xval <- seq(-6, 6, length = 256)
    psi <- (2/sqrt(3) * pi^(-0.25)) * (1 - psi_xval^2) *
      exp(-psi_xval^2/2)
  }
  else if (is.matrix(wavelet)) {
    if (nrow(wavelet) == 2) {
      psi_xval <- wavelet[1, ]
      psi <- wavelet[2, ]
    }
    else if (ncol(wavelet) == 2) {
      psi_xval <- wavelet[, 1]
      psi <- wavelet[, 2]
    }
    else {
      stop("Unsupported wavelet format!")
    }
  }
  else {
    stop("Unsupported wavelet!")
  }
  oldLen <- length(ms)
  ms <- MSW.extendNBase(ms, nLevel = NULL, base = 2)
  len <- length(ms)
  nbscales <- length(scales)
  wCoefs <- NULL
  psi_xval <- psi_xval - psi_xval[1]
  dxval <- psi_xval[2]
  xmax <- psi_xval[length(psi_xval)]
  for (i in seq_along(scales)) {
    scale.i <- scales[i]
    f <- rep(0, len)
    j <- 1 + floor((0:(scale.i * xmax))/(scale.i * dxval))
    if (length(j) == 1)
      j <- c(1, 1)
    lenWave <- length(j)
    f[seq_len(lenWave)] <- rev(psi[j]) - mean(psi[j])
    if (length(f) > len)
    {i<-i-1;break;}   ##  stop(paste("scale", scale.i, "is too large!"))
    wCoefs.i <- 1/sqrt(scale.i) * convolve(ms, f)
    wCoefs.i <- c(wCoefs.i[(len - floor(lenWave/2) + 1):len],
                  wCoefs.i[seq_len(len - floor(lenWave/2))])
    wCoefs <- cbind(wCoefs, wCoefs.i)
  }
  if (i < 1) return(NA)
  scales <- scales[seq_len(i)]
  if (length(scales) == 1)
    wCoefs <- matrix(wCoefs, ncol = 1)
  colnames(wCoefs) <- scales
  wCoefs <- wCoefs[seq_len(oldLen), , drop = FALSE]
  wCoefs
}

#' @importFrom stats nextn
MSW.extendNBase <- function(x, nLevel=1, base=2, ...) { ## from package MassSpecWavelet
  if (!is.matrix(x)) x <- matrix(x, ncol=1)
  
  nR <- nrow(x)
  if (is.null(nLevel)) {
    nR1 <- nextn(nR, base)
  } else {
    nR1 <- ceiling(nR / base^nLevel) * base^nLevel
  }
  if (nR != nR1) {
    x <- MSW.extendLength(x, addLength=nR1-nR, ...)
  }
  x
}
MSW.extendLength <-   function(x, addLength=NULL, method=c('reflection', 'open', 'circular'), direction=c('right', 'left', 'both'))  {       ## from package MassSpecWavelet
  if (is.null(addLength)) stop('Please provide the length to be added!')
  if (!is.matrix(x)) x <- matrix(x, ncol=1)
  method <- match.arg(method)
  direction <- match.arg(direction)
  
  nR <- nrow(x)
  nR1 <- nR + addLength
  if (direction == 'both') {
    left <- right <- addLength
  } else if (direction == 'right') {
    left <- 0
    right <- addLength
  } else if (direction == 'left') {
    left <- addLength
    right <- 0
  }
  
  if (right > 0) {
    x <- switch(method,
                reflection =rbind(x, x[nR:(2 * nR - nR1 + 1), , drop=FALSE]),
                open = rbind(x, matrix(rep(x[nR,], addLength), ncol=ncol(x), byrow=TRUE)),
                circular = rbind(x, x[seq_len(nR1 - nR),, drop=FALSE]))
  }
  
  if (left > 0) {
    x <- switch(method,
                reflection =rbind(x[addLength:1, , drop=FALSE], x),
                open = rbind(matrix(rep(x[1,], addLength), ncol=ncol(x), byrow=TRUE), x),
                circular = rbind(x[(2 * nR - nR1 + 1):nR,, drop=FALSE], x))
  }
  if (ncol(x) == 1)  x <- as.vector(x)
  
  x
}
MSW.getLocalMaximumCWT <-  function(wCoefs, minWinSize=5, amp.Th=0)  {        ## from package MassSpecWavelet
  localMax <- NULL
  scales <- as.numeric(colnames(wCoefs))
  
  for (i in seq_along(scales)) {
    scale.i <- scales[i]
    winSize.i <- scale.i * 2 + 1
    if (winSize.i < minWinSize) {
      winSize.i <- minWinSize
    }
    temp <- MSW.localMaximum(wCoefs[,i], winSize.i)
    localMax <- cbind(localMax, temp)
  }
  ## Set the values less than peak threshold as 0
  localMax[wCoefs < amp.Th] <- 0
  colnames(localMax) <- colnames(wCoefs)
  rownames(localMax) <- rownames(wCoefs)
  localMax
}
MSW.localMaximum <-  function (x, winSize = 5)  {   ## from package MassSpecWavelet
  len <- length(x)
  rNum <- ceiling(len/winSize)
  
  ## Transform the vector as a matrix with column length equals winSize
  ## and find the maximum position at each row.
  y <- matrix(c(x, rep(x[len], rNum * winSize - len)), nrow=winSize)
  y.maxInd <- apply(y, 2, which.max)
  ## Only keep the maximum value larger than the boundary values
  selInd <- which(apply(y, 2, function(x) max(x) > x[1] & max(x) > x[winSize]))
  
  ## keep the result
  localMax <- rep(0, len)
  localMax[(selInd-1) * winSize + y.maxInd[selInd]] <- 1
  
  ## Shift the vector with winSize/2 and do the same operation
  shift <- floor(winSize/2)
  rNum <- ceiling((len + shift)/winSize)
  y <- matrix(c(rep(x[1], shift), x, rep(x[len], rNum * winSize - len - shift)), nrow=winSize)
  y.maxInd <- apply(y, 2, which.max)
  ## Only keep the maximum value larger than the boundary values
  selInd <- which(apply(y, 2, function(x) max(x) > x[1] & max(x) > x[winSize]))
  localMax[(selInd-1) * winSize + y.maxInd[selInd] - shift] <- 1
  
  ## Check whether there is some local maxima have in between distance less than winSize
  maxInd <- which(localMax > 0)
  selInd <- which(diff(maxInd) < winSize)
  if (length(selInd) > 0) {
    selMaxInd1 <- maxInd[selInd]
    selMaxInd2 <- maxInd[selInd + 1]
    temp <- x[selMaxInd1] - x[selMaxInd2]
    localMax[selMaxInd1[temp <= 0]] <- 0
    localMax[selMaxInd2[temp > 0]] <- 0
  }
  
  localMax
}
MSW.getRidge <-  function(localMax, iInit=ncol(localMax), step=-1, iFinal=1, minWinSize=3, gapTh=3, skip=NULL)  {  ## modified from package MassSpecWavelet
  
  scales <- as.numeric(colnames(localMax))
  if (is.null(scales))  scales <- seq_len(ncol(localMax))
  
  maxInd_curr <- which(localMax[, iInit] > 0)
  nMz <- nrow(localMax)
  
  if (is.null(skip)) {
    skip <- iInit + 1
  }
  
  ## Identify all the peak pathes from the coarse level to detail levels (high column to low column)
  ## Only consider the shortest path
  if ( ncol(localMax) > 1 ) colInd <- seq(iInit+step, iFinal, step)
  else colInd <- 1
  ridgeList <- as.list(maxInd_curr)
  names(ridgeList) <- maxInd_curr
  peakStatus <- as.list(rep(0, length(maxInd_curr)))
  names(peakStatus) <- maxInd_curr
  
  ## orphanRidgeList keep the ridges disconnected at certain scale level
  ## Changed by Pan Du 05/11/06
  orphanRidgeList <- NULL
  orphanRidgeName <- NULL
  nLevel <- length(colInd)
  
  for (j in seq_len(nLevel)) {
    col.j <- colInd[j]
    scale.j <- scales[col.j]
    
    if (colInd[j] == skip) {
      oldname <- names(ridgeList)
      ridgeList <- lapply(ridgeList, function(x) c(x, x[length(x)]))
      ##peakStatus <- lapply(peakStatus, function(x) c(x, x[length(x)]))
      names(ridgeList) <- oldname
      ##names(peakStatus) <- oldname
      next
    }
    
    if (length(maxInd_curr) == 0) {
      maxInd_curr <- which(localMax[, col.j] > 0)
      next
    }
    
    ## The slide window size is proportional to the CWT scale
    ## winSize.j <- scale.j / 2 + 1
    winSize.j <- floor(scale.j/2)
    if (winSize.j < minWinSize) {
      winSize.j <- minWinSize
    }
    
    selPeak.j <- NULL
    remove.j <- NULL
    for (k in seq_along(maxInd_curr)) {
      ind.k <- maxInd_curr[k]
      start.k <- ifelse(ind.k-winSize.j < 1, 1, ind.k-winSize.j)
      end.k <- ifelse(ind.k+winSize.j > nMz, nMz, ind.k+winSize.j)
      ind.curr <- which(localMax[start.k:end.k, col.j] > 0) + start.k - 1
      ##ind.curr <- which(localMax[, col.j] > 0)
      if (length(ind.curr) == 0) {
        status.k <- peakStatus[[as.character(ind.k)]]
        ## bug  work-around
        if (is.null(status.k)) status.k <- gapTh +1
        ##
        if (status.k > gapTh & scale.j >= 2) {
          temp <- ridgeList[[as.character(ind.k)]];
          
          if(length(temp)-status.k <= 0) next
          
          orphanRidgeList <- c(orphanRidgeList, list(temp[seq_len(length(temp)-status.k)]))
          orphanRidgeName <- c(orphanRidgeName, paste(col.j + status.k + 1, ind.k, sep='_'))
          remove.j <- c(remove.j, as.character(ind.k))
          next
        } else {
          ind.curr <- ind.k
          peakStatus[[as.character(ind.k)]] <- status.k + 1
        }
      } else {
        peakStatus[[as.character(ind.k)]] <- 0
        if (length(ind.curr) >= 2)  ind.curr <- ind.curr[which.min(abs(ind.curr - ind.k))]
      }
      ridgeList[[as.character(ind.k)]] <- c(ridgeList[[as.character(ind.k)]], ind.curr)
      selPeak.j <- c(selPeak.j, ind.curr)
    }
    ## Remove the disconnected lines from the currrent list
    if (length(remove.j) > 0) {
      removeInd <- which(names(ridgeList) %in% remove.j)
      ridgeList <- ridgeList[-removeInd]
      peakStatus <- peakStatus[-removeInd]
    }
    
    ## Check for duplicated selected peaks and only keep the one with the longest path.
    dupPeak.j <- unique(selPeak.j[duplicated(selPeak.j)])
    if (length(dupPeak.j) > 0) {
      removeInd <- NULL
      for (dupPeak.jk in dupPeak.j) {
        selInd <- which(selPeak.j == dupPeak.jk)
        selLen <- sapply(ridgeList[selInd], length)
        removeInd.jk <- which.max(selLen)
        removeInd <- c(removeInd, selInd[-removeInd.jk])
        orphanRidgeList <- c(orphanRidgeList, ridgeList[removeInd.jk])
        orphanRidgeName <- c(orphanRidgeName, paste(col.j, selPeak.j[removeInd.jk], sep='_'))
      }
      selPeak.j <- selPeak.j[-removeInd]
      ridgeList <- ridgeList[-removeInd]
      peakStatus <- peakStatus[-removeInd]
    }
    
    ## Update the names of the ridgeList as the new selected peaks
    ##if (scale.j >= 2) {
    if (length(ridgeList) > 0) names(ridgeList) <- selPeak.j
    if (length(peakStatus) > 0) names(peakStatus) <- selPeak.j
    ##}
    
    ## If the level is larger than 3, expand the peak list by including other unselected peaks at that level
    if (scale.j >= 2) {
      maxInd_next <- which(localMax[, col.j] > 0)
      unSelPeak.j <- maxInd_next[!(maxInd_next %in% selPeak.j)]
      newPeak.j <- as.list(unSelPeak.j)
      names(newPeak.j) <- unSelPeak.j
      ## Update ridgeList
      ridgeList <- c(ridgeList, newPeak.j)
      maxInd_curr <- c(selPeak.j, unSelPeak.j)
      ## Update peakStatus
      newPeakStatus <- as.list(rep(0, length(newPeak.j)))
      names(newPeakStatus) <- newPeak.j
      peakStatus <- c(peakStatus, newPeakStatus)
    } else {
      maxInd_curr <- selPeak.j
    }
  }
  
  ## Attach the peak level at the beginning of the ridge names
  if (length(ridgeList) > 0) names(ridgeList) <- paste(1, names(ridgeList), sep='_')
  if (length(orphanRidgeList) > 0) names(orphanRidgeList) <- orphanRidgeName
  ## Combine ridgeList and orphanRidgeList
  ridgeList <- c(ridgeList, orphanRidgeList)
  if (length(ridgeList) == 0) return(NULL)
  
  ## Reverse the order as from the low level to high level.
  ridgeList <- lapply(ridgeList, rev)
  ## order the ridgeList in increasing order
  ##ord <- order(selPeak.j)
  ##ridgeList <- ridgeList[ord]
  
  ## Remove possible duplicated ridges
  ridgeList <- ridgeList[!duplicated(names(ridgeList))]
  
  attr(ridgeList, 'class') <- 'ridgeList'
  attr(ridgeList, 'scales') <- scales
  return(ridgeList)
}
valueCount2ScanIndex <- function(valCount){
  ## Convert into 0 based.
  valCount <- cumsum(valCount)
  return(as.integer(c(0, valCount[-length(valCount)])))
}
descendMinTol <- function(d,startpos,maxDescOutlier) {
  l <- startpos[1]; r <- startpos[2]; outl <- 0; N <- length(d)
  ## left
  while ((l > 1) && (d[l] > 0) && outl <= maxDescOutlier) {
    if (outl > 0) vpos <- opos else vpos <- l
    if (d[l-1] > d[vpos]) outl <- outl + 1 else outl <- 0
    if (outl == 1) opos <- l
    l <- l -1
  }
  if (outl > 0) l <- l + outl
  ## right
  outl <- 0;
  while ((r < N) && (d[r] > 0) && outl <= maxDescOutlier) {
    if (outl > 0) vpos <- opos else vpos <- r
    if (d[r+1] > d[vpos]) outl <- outl + 1 else outl <- 0
    if (outl == 1) opos <- r
    r <- r + 1
  }
  if (outl > 0) r <- r - outl
  c(l,r)
}
joinOverlappingPeaks <- function(td, d, otd, omz, od, scantime, scan.range, peaks, maxGaussOverlap=0.5) {
  ## Fix issue #284: avoid having identical peaks multiple times in this
  ## matrix.
  peaks <- unique(peaks)
  gausspeaksidx <- which(!is.na(peaks[,"mu"]))
  Ngp <- length(gausspeaksidx)
  if (Ngp == 0)
    return(peaks)
  
  newpeaks <- NULL
  
  gpeaks <- peaks[gausspeaksidx, , drop = FALSE]
  if (nrow(peaks) - Ngp > 0)
    notgausspeaks <- peaks[-gausspeaksidx, , drop = FALSE]
  
  if (Ngp > 1) {
    comb <- which(upper.tri(matrix(0, Ngp, Ngp)), arr.ind = TRUE)
    overlap <- logical(nrow(comb))
    overlap <- rep(FALSE, dim(comb)[1])
    for (k in seq_len(nrow(comb))) {
      p1 <- comb[k, 1]
      p2 <- comb[k, 2]
      overlap[k] <- gaussCoverage(xlim = scan.range,
                                  h1 = gpeaks[p1, "h"],
                                  mu1 = gpeaks[p1, "mu"],
                                  s1 = gpeaks[p1, "sigma"],
                                  h2 = gpeaks[p2, "h"],
                                  mu2 = gpeaks[p2, "mu"],
                                  s2 = gpeaks[p2, "sigma"]) >=
        maxGaussOverlap
    }
  } else overlap <- FALSE
  
  if (any(overlap) && (Ngp > 1)) {
    jlist <- list()
    if (length(which(overlap)) > 1) {
      gm <- comb[overlap, ]
      ## create list of connected components
      cc <- list()
      cc[[1]] <- gm[1,] ## copy first entry
      for (j in 2:dim(gm)[1]) { ## search for connections
        ccl <- unlist(cc)
        nl <- sapply(cc, function(x) length(x))
        ccidx <- rep(seq_along(nl),nl)
        idx <- match(gm[j,],ccl)
        if (any(!is.na(idx))) { ## connection found, add to list
          pos <- ccidx[idx[which(!is.na(idx))[1]]]
          cc[[pos]] <- c(cc[[pos]],gm[j,])
        } else  ## create new list element
          cc[[length(cc) + 1]] <- gm[j,]
        
      }
      ccn <- list()
      lcc <- length(cc)
      ins <- rep(FALSE,lcc)
      if (lcc > 1) {
        jcomb <- which(upper.tri(matrix(0,lcc,lcc)),arr.ind = TRUE)
        for (j in seq_len(dim(jcomb)[1])) {
          j1 <- jcomb[j,1]; j2 <- jcomb[j,2]
          if (any(cc[[j1]] %in% cc[[j2]]))
            ccn[[length(ccn) +1]] <- unique(c(cc[[j1]],cc[[j2]]))
          else {
            if (!ins[j1]) {
              ccn[[length(ccn) +1]] <- unique(cc[[j1]])
              ins[j1] <- TRUE
            }
            if (!ins[j2]) {
              ccn[[length(ccn) +1]] <- unique(cc[[j2]])
              ins[j2] <- TRUE
            }
          }
        }
      } else ccn <- cc;
      
      size <- sapply(ccn, function(x) length(x))
      s2idx <- which(size >= 2)
      
      if (length(s2idx) > 0) {
        for (j in seq_along(s2idx)) {
          pgroup <- unique(ccn[[ s2idx[j] ]])
          jlist[[j]] <- pgroup
        }
      } else stop('(length(s2idx) = 0) ?!?')
    } else jlist[[1]] <- comb[overlap, ]
    
    ## join all peaks belonging to one cc
    for (j in seq_along(jlist)) {
      jidx <- jlist[[j]]
      newpeak <- gpeaks[jidx[1], , drop = FALSE]
      newmin <- min(gpeaks[jidx, "lmin"])
      newmax <- max(gpeaks[jidx, "lmax"])
      newpeak[1, "scpos"] <- -1 ## not defined after join
      newpeak[1, "scmin"] <- -1 ##    ..
      newpeak[1, "scmax"] <- -1 ##    ..
      newpeak[1, "scale"] <- -1 ##    ..
      
      newpeak[1, "maxo"] <- max(gpeaks[jidx, "maxo"])
      newpeak[1, "sn"]   <- max(gpeaks[jidx, "sn"])
      newpeak[1, "lmin"] <- newmin
      newpeak[1, "lmax"] <- newmax
      newpeak[1, "rtmin"] <- scantime[td[newmin]]
      newpeak[1, "rtmax"] <- scantime[td[newmax]]
      newpeak[1,"rt"] <- weighted.mean(gpeaks[jidx, "rt"],
                                       w = gpeaks[jidx, "maxo"])
      
      ## Re-assign m/z values
      p1 <- match(td[newmin], otd)[1]
      p2 <- match(td[newmax], otd)
      p2 <- p2[length(p2)]
      if (is.na(p1)) p1 <- 1
      if (is.na(p2)) p2 <- length(omz)
      mz.value <- omz[p1:p2]
      mz.int <- od[p1:p2]
      
      ## re-calculate m/z value for peak range
      #mzmean <- do.call(mzCenterFun, list(mz = mz.value,
      #                                    intensity = mz.int))
      mzmean <- weighted.mean(mz.value, mz.int)
      mzrange <- range(mz.value)
      newpeak[1, "mz"] <- mzmean
      newpeak[1, c("mzmin","mzmax")] <- mzrange
      
      ## re-fit gaussian
      md <- max(d[newmin:newmax])
      d1 <- d[newmin:newmax] / md
      pgauss <- fitGauss(td[newmin:newmax],
                         d[newmin:newmax],
                         pgauss = list(mu = td[newmin] +
                                         (td[newmax] - td[newmin])/2,
                                       sigma = td[newmax] - td[newmin],
                                       h = max(gpeaks[jidx, "h"])))
      if (!any(is.na(pgauss)) && all(pgauss > 0)) {
        newpeak[1, "mu"]    <- pgauss$mu
        newpeak[1, "sigma"] <- pgauss$sigma
        newpeak[1, "h"]     <- pgauss$h
        newpeak[1, "egauss"]<- sqrt((1/length(td[newmin:newmax])) *
                                      sum(((d1 - gauss(td[newmin:newmax],
                                                       pgauss$h/md,
                                                       pgauss$mu,
                                                       pgauss$sigma))^2)))
      } else { ## re-fit after join failed
        newpeak[1, "mu"]       <- NA
        newpeak[1, "sigma"]    <- NA
        newpeak[1, "h"]        <- NA
        newpeak[1, "egauss"]   <- NA
      }
      
      newpeaks <- rbind(newpeaks, newpeak)
    }
    ## add the remaining peaks
    jp <- unique(unlist(jlist))
    if (dim(peaks)[1] - length(jp) > 0)
      newpeaks <- rbind(newpeaks, gpeaks[-jp, ])
    
  } else
    newpeaks <- gpeaks
  
  grt.min <- newpeaks[, "rtmin"]
  grt.max <- newpeaks[, "rtmax"]
  
  if (nrow(peaks) - Ngp > 0) { ## notgausspeaks
    for (k in seq_len(nrow(notgausspeaks))) {
      ## here we can only check if they are completely overlapped
      ## by other peaks
      if (!any((notgausspeaks[k, "rtmin"] >= grt.min) &
               (notgausspeaks[k,"rtmax"] <= grt.max)))
        newpeaks <- rbind(newpeaks,notgausspeaks[k,])
    }
  }
  
  rownames(newpeaks) <- NULL
  newpeaks
}
trimm <- function(x, trim=c(0.05,0.95)) {
  a <- sort(x[x>0])
  Na <- length(a)
  quant <- round((Na*trim[1])+1):round(Na*trim[2])
  a[quant]
}
gaussCoverage <- function(xlim,h1,mu1,s1,h2,mu2,s2) {
  overlap <- NA
  by = 0.05
  ## Calculate points of intersection
  a <- s2^2 - s1^2
  cc <- -( 2 * s1^2 * s2^2 * (log(h1) - log(h2)) + (s1^2 * mu2^2) - (s2^2 * mu1^2) )
  b <- ((2 * s1^2 *mu2) - (2 * s2^2 * mu1))
  D <- b^2 - (a*cc)
  if (a==0) {
    S1 <- -cc/b
    S2 <- NA
  } else if ((D < 0) || ((b^2 - (4*a*cc)) < 0)) {
    S1 <- S2 <- NA
  } else {
    S1 <- (-b + sqrt(b^2 - (4*a*cc))) / (2*a)
    S2 <- (-b - sqrt(b^2 - (4*a*cc))) / (2*a)
    if (S2 < S1)
    {
      tmp <- S1
      S1 <- S2
      S2 <- tmp
    }
  }
  if (!is.na(S1)) if (S1 < xlim[1] || S1 > xlim[2]) S1 <- NA
  if (!is.na(S2)) if (S2 < xlim[1] || S2 > xlim[2]) S2 <- NA
  
  x <- seq(xlim[1],xlim[2],by=by)
  vsmall <- min(sum(gauss(x,h1,mu1,s1)), sum(gauss(x,h2,mu2,s2)))
  
  if (!is.na(S1) && !is.na(S2)) {
    x0 <- seq(xlim[1],S1,by=by)
    xo <- seq(S1,S2,by=by)
    x1 <- seq(S2,xlim[2],by=by)
    if (gauss(x0[cent(x0)],h1,mu1,s1) < gauss(x0[cent(x0)],h2,mu2,s2)) {
      ov1 <- sum(gauss(x0,h1,mu1,s1))
    } else {
      ov1 <- sum(gauss(x0,h2,mu2,s2))
    }
    if (gauss(xo[cent(xo)],h1,mu1,s1) < gauss(xo[cent(xo)],h2,mu2,s2)) {
      ov <- sum(gauss(xo,h1,mu1,s1))
    } else {
      ov <- sum(gauss(xo,h2,mu2,s2))
    }
    if (gauss(x1[cent(x1)],h1,mu1,s1) < gauss(x1[cent(x1)],h2,mu2,s2)) {
      ov2 <- sum(gauss(x1,h1,mu1,s1))
    } else {
      ov2 <- sum(gauss(x1,h2,mu2,s2))
    }
    overlap <- ov1 + ov + ov2
  } else
    if (is.na(S1) && is.na(S2)) { ## no overlap -> intergrate smaller function
      if (gauss(x[cent(x)],h1,mu1,s1) < gauss(x[cent(x)],h2,mu2,s2)) {
        overlap <- sum(gauss(x,h1,mu1,s1))
      } else {
        overlap <- sum(gauss(x,h2,mu2,s2))
      }
    } else
      if (!is.na(S1) || !is.na(S2)) {
        if (is.na(S1)) S0 <- S2 else S0 <- S1
        x0 <- seq(xlim[1],S0,by=by)
        x1 <- seq(S0,xlim[2],by=by)
        g01 <- gauss(x0[cent(x0)],h1,mu1,s1)
        g02 <- gauss(x0[cent(x0)],h2,mu2,s2)
        g11 <- gauss(x1[cent(x1)],h1,mu1,s1)
        g12 <- gauss(x1[cent(x1)],h2,mu2,s2)
        if (g01 < g02) ov1 <- sum(gauss(x0,h1,mu1,s1)) else ov1 <- sum(gauss(x0,h2,mu2,s2))
        if (g11 < g12) ov2 <- sum(gauss(x1,h1,mu1,s1)) else ov2 <- sum(gauss(x1,h2,mu2,s2))
        if ((g01 == g02) && (g01==0)) ov1 <- 0
        if ((g11 == g12) && (g11==0)) ov2 <- 0
        overlap <- ov1 + ov2
      }
  
  overlap / vsmall
}
cent <- function(x) {
  N <- length(x)
  if (N == 1) return(1)
  floor(N/2)
}
gauss <- function(x, h, mu, sigma){
  h*exp(-(x-mu)^2/(2*sigma^2))
}
fitGauss <- function(td, d, pgauss = NA) {
  if (length(d) < 3) return(rep(NA,3))
  if (!any(is.na(pgauss))) { mu <- pgauss$mu; sigma <- pgauss$sigma;h <- pgauss$h }
  fit <- try(nls(d ~ GaussModel(td,mu,sigma,h)), silent = TRUE)
  if (is(fit, "try-error"))
    fit <- try(nls(d ~ GaussModel(td, mu, sigma, h), algorithm = 'port'),
               silent = TRUE)
  if (is(fit, "try-error"))  return(rep(NA, 3))
  
  as.data.frame(t(fit$m$getPars()))
}

#' nls.start
#'
#' @param formula NA
#' @param data NA
#' @param start NA
#' @param algorithm NA
#' @param subset NA
#' @param weights NA
#' @param na.action NA
#' @param model NA
#' @param lower NA
#' @param upper NA
#' @param ... NA
#' @return NA
#' @noRd
#' @importFrom stats setNames
#' @importFrom stats model.weights
#' @importFrom stats getInitial
nls.start <- function (formula, data = parent.frame(), start,
                       algorithm = c("default", "plinear", "port"),
                       subset, weights, na.action, model = FALSE, lower = -Inf, 
                       upper = Inf, ...)
{

  formula <- as.formula(formula)
  algorithm <- match.arg(algorithm)
  if (!is.list(data) && !is.environment(data)) 
    stop("'data' must be a list or an environment")
  mf <- cl <- match.call()
  varNames <- all.vars(formula)
  if (length(formula) == 2L) {
    formula[[3L]] <- formula[[2L]]
    formula[[2L]] <- 0
  }
  form2 <- formula
  form2[[2L]] <- 0
  varNamesRHS <- all.vars(form2)
  mWeights <- missing(weights)
  pnames <- if (missing(start)) {
    if (!is.null(attr(data, "parameters"))) {
      names(attr(data, "parameters"))
    }
    else {
      cll <- formula[[length(formula)]]
      if (is.symbol(cll)) {
        cll <- substitute(S + 0, list(S = cll))
      }
      fn <- as.character(cll[[1L]])
      if (is.null(func <- tryCatch(get(fn), error = function(e) NULL))) 
        func <- get(fn, envir = parent.frame())
      if (!is.null(pn <- attr(func, "pnames"))) 
        as.character(as.list(match.call(func, call = cll))[-1L][pn])
    }
  }
  else names(start)
  env <- environment(formula)
  if (is.null(env)) 
    env <- parent.frame()
  if (length(pnames)) 
    varNames <- varNames[is.na(match(varNames, pnames))]
  lenVar <- function(var) tryCatch(length(eval(as.name(var), 
                                               data, env)), error = function(e) -1L)
  if (length(varNames)) {
    n <- vapply(varNames, lenVar, 0)
    if (any(not.there <- n == -1L)) {
      nnn <- names(n[not.there])
      if (missing(start)) {
        if (algorithm == "plinear") 
          stop("no starting values specified")
        warning("No starting values specified for some parameters.\n", 
                "Initializing ", paste(sQuote(nnn), collapse = ", "), 
                " to '1.'.\n", "Consider specifying 'start' or using a selfStart model", 
                domain = NA)
        setNames <- stats::setNames;
        start <- setNames(as.list(rep_len(1, length(nnn))), 
                          nnn)
        varNames <- varNames[i <- is.na(match(varNames, 
                                              nnn))]
        n <- n[i]
      }
      else stop(gettextf("parameters without starting value in 'data': %s", 
                         paste(nnn, collapse = ", ")), domain = NA)
    }
  }
  else {
    if (length(pnames) && any((np <- sapply(pnames, lenVar)) == 
                              -1)) {
      message(sprintf(ngettext(sum(np == -1), "fitting parameter %s without any variables", 
                               "fitting parameters %s without any variables"), 
                      paste(sQuote(pnames[np == -1]), collapse = ", ")), 
              domain = NA)
      n <- integer()
    }
    else stop("no parameters to fit")
  }
  respLength <- length(eval(formula[[2L]], data, env))
  if (length(n) > 0L) {
    varIndex <- n%%respLength == 0
    if (is.list(data) && diff(range(n[names(n) %in% names(data)])) > 
        0) {
      mf <- data
      if (!missing(subset)) 
        warning("argument 'subset' will be ignored")
      if (!missing(na.action)) 
        warning("argument 'na.action' will be ignored")
      if (missing(start)) 
        start <- getInitial(formula, mf)
      startEnv <- new.env(hash = FALSE, parent = environment(formula))
      for (i in names(start)) assign(i, start[[i]], envir = startEnv)
      rhs <- eval(formula[[3L]], data, startEnv)
      n <- NROW(rhs)
      wts <- if (mWeights) 
        rep_len(1, n)
      else eval(substitute(weights), data, environment(formula))
    }
    else {
      vNms <- varNames[varIndex];
      model.weights <- stats::model.weights
      if (any(nEQ <- vNms != make.names(vNms))) 
        vNms[nEQ] <- paste0("`", vNms[nEQ], "`")
      mf$formula <- as.formula(paste("~", paste(vNms, 
                                                collapse = "+")), env = environment(formula))
      mf$start <- mf$control <- mf$algorithm <- mf$trace <- mf$model <- NULL
      mf$lower <- mf$upper <- NULL
      mf[[1L]] <- quote(stats::model.frame)
      mf <- eval.parent(mf)
      n <- nrow(mf)
      mf <- as.list(mf)
      wts <- if (!mWeights) 
        model.weights(mf)
      else rep_len(1, n)
    }
    if (any(wts < 0 | is.na(wts))) 
      stop("missing or negative weights not allowed")
  }
  else {
    varIndex <- logical()
    mf <- list(0)
    wts <- numeric()
  }
  getInitial <- stats::getInitial;
  start <- getInitial(formula, mf)
  
  return(start)
  
}


running <- function (X, Y = NULL, fun = mean, width = min(length(X), 20),
                     allow.fewer = FALSE, pad = FALSE, align = c("right", "center",
                                                                 "left"), simplify = TRUE, by, ...) {   ## from package gtools
  align = match.arg(align)
  n <- length(X)
  if (align == "left") {
    from <- seq_len(n)
    to <- pmin(seq_len(n) + width - 1, n)
  }
  else if (align == "right") {
    from <- pmax(seq_len(n) - width + 1, 1)
    to <- seq_len(n)
  }
  else {
    from <- pmax((2 - width):n, 1)
    to <- pmin(seq_len(n + width - 1), n)
    if (!odd(width))
      stop("width must be odd for center alignment")
  }
  elements <- apply(cbind(from, to), 1, function(x) seq(x[1],
                                                        x[2]))
  if (is.matrix(elements))
    elements <- as.data.frame(elements)
  names(elements) <- paste(from, to, sep = ":")
  if (!allow.fewer) {
    len <- sapply(elements, length)
    skip <- (len < width)
  }
  else {
    skip <- 0
  }
  run.elements <- elements[!skip]
  if (!invalid(by))
    run.elements <- run.elements[seq(from = 1, to = length(run.elements),
                                     by = by)]
  if (is.null(Y)) {
    funct1 <- function(which, what, fun, ...) fun(what[which],
                                                 ...)
    if (simplify)
      Xvar <- sapply(run.elements, funct1, what = X, fun = fun,
                     ...)
    else Xvar <- lapply(run.elements, funct1, what = X, fun = fun,
                        ...)
  } else {
    funct2 <- function(which, XX, YY, fun, ...) fun(XX[which],
                                                   YY[which], ...)
    if (simplify)
      Xvar <- sapply(run.elements, funct2, XX = X, YY = Y,
                     fun = fun, ...)
    else Xvar <- lapply(run.elements, funct2, XX = X, YY = Y,
                        fun = fun, ...)
  }
  if (allow.fewer || !pad)
    return(Xvar)
  if (simplify)
    if (is.matrix(Xvar)) {
      wholemat <- matrix(new(class(Xvar[1, 1]), NA), ncol = length(to),
                         nrow = nrow(Xvar))
      colnames(wholemat) <- paste(from, to, sep = ":")
      wholemat[, -skip] <- Xvar
      Xvar <- wholemat
    }
  else {
    wholelist <- rep(new(class(Xvar[1]), NA), length(from))
    names(wholelist) <- names(elements)
    wholelist[names(Xvar)] <- Xvar
    Xvar <- wholelist
  }
  return(Xvar)
}
invalid <- function (x) {   ## from package gtools
  if (missing(x) || is.null(x) || length(x) == 0)
    return(TRUE)
  if (is.list(x))
    return(all(sapply(x, invalid)))
  else if (is.vector(x))
    return(all(is.na(x)))
  else return(FALSE)
}
odd <- function (x) x != as.integer(x/2) * 2;
na.flatfill <- function(x) {
  
  realloc <- which(!is.na(x))
  if (realloc[1] > 1)
    x[seq_len(realloc[1]-1)] <- x[realloc[1]]
  if (realloc[length(realloc)] < length(x))
    x[(realloc[length(realloc)]+1):length(x)] <- x[realloc[length(realloc)]]
  x
}

#' @title GaussModel
#' @description GaussModel
#' @param x a numeric vector of values at which to evaluate the model
#' @param mu mean  of the distribution function
#' @param sigma standard deviation of the distribution fuction
#' @param h height of the distribution function
#' @references Smith, C.A., Want, E.J., O'Maille, G., Abagyan,R., Siuzdak, G. (2006). 
#' "XCMS: Processing mass spectrometry data for metabolite profiling using nonlinear 
#' peak alignment, matching and identification." Analytical Chemistry, 78, 779-787.
#' @return return result of selfstart
#' @export
#' @examples 
#' ints<- c(c(1:5,5:1))
#' ##nls(y ~ GaussModel(x, mu, sigma, h), 
#' ##                   data.frame(x = 1:length(ints), y = ints))

GaussModel <- selfStart(~ h*exp(-(x-mu)^2/(2*sigma^2)), function(mCall, data, LHS) {
  
  xy <- sortedXyData(mCall[["x"]], LHS, data);
  
  len <- dim(xy)[1];
  xyarea <- sum((xy[2:len,2]+xy[seq_len(len-1),2])*(xy[2:len,1]-xy[seq_len(len-1),1]))/2;
  maxpos <- which.max(xy[,2]);
  
  mu <- xy[maxpos,1];
  h <- xy[maxpos,2];
  sigma <- xyarea/(h*sqrt(2*pi));
  
  value <- c(mu, sigma, h);
  names(value) <- mCall[c("mu", "sigma", "h")];
  value;
  
}, c("mu", "sigma", "h"))

#' @importFrom utils head
#' @importFrom utils tail
.match_trim_vector_index <- function(x, n = 5) {
  lens <- lengths(x)
  min_len <- min(lens)
  if (length(unique(lens)) == 1)
    replicate(n = length(lens), seq_len(min_len))
  hd <- vapply(x, function(z) mean(head(z, n = n)), numeric(1))
  tl <- vapply(x, function(z) mean(tail(z, n = n)), numeric(1))
  if (diff(range(hd)) <= diff(range(tl)))
    replicate(n = length(x), seq_len(min_len), FALSE)
  else
    lapply(lens, function(z) (z - min_len + 1):z)
}
.match_trim_vectors <- function(x, n = 5, idxs) {
  if (missing(idxs))
    idxs <- .match_trim_vector_index(x, n = n)
  mapply(x, idxs, FUN = function(z, idx) z[idx], SIMPLIFY = FALSE)
}
.narrow_rt_boundaries <- function(lm, d, thresh = 1) {
  lm_seq <- lm[1]:lm[2]
  above_thresh <- d[lm_seq] >= thresh
  if (any(above_thresh)) {
    ## Expand by one on each side to be consistent with old code.
    above_thresh <- above_thresh | c(above_thresh[-1], FALSE) |
      c(FALSE, above_thresh[-length(above_thresh)])
    lm <- range(lm_seq[above_thresh], na.rm = TRUE)
  }
  lm
}
.rawMat <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), rtrange = numeric(), scanrange = numeric(), log = FALSE) {
  if (length(rtrange) >= 2) {
    rtrange <- range(rtrange)
    ## Fix for issue #267. rtrange outside scanrange causes scanrange
    ## being c(Inf, -Inf)
    scns <- which((scantime >= rtrange[1]) & (scantime <= rtrange[2]))
    if (!length(scns))
      return(matrix(
        nrow = 0, ncol = 3,
        dimnames = list(character(), c("time", "mz", "intensity"))))
    scanrange <- range(scns)
  }
  if (length(scanrange) < 2)
    scanrange <- c(1, length(valsPerSpect))
  else scanrange <- range(scanrange)
  if (!all(is.finite(scanrange)))
    stop("'scanrange' does not contain finite values")
  if (!all(is.finite(mzrange)))
    stop("'mzrange' does not contain finite values")
  if (!all(is.finite(rtrange)))
    stop("'rtrange' does not contain finite values")
  if (scanrange[1] == 1)
    startidx <- 1
  else
    startidx <- sum(valsPerSpect[seq_len(scanrange[1] - 1)]) + 1
  endidx <- sum(valsPerSpect[seq_len(scanrange[2])])
  scans <- rep(scanrange[1]:scanrange[2],
               valsPerSpect[scanrange[1]:scanrange[2]])
  masses <- mz[startidx:endidx]
  massidx <- seq_along(masses)
  if (length(mzrange) >= 2) {
    mzrange <- range(mzrange)
    massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))]
  }
  int <- int[startidx:endidx][massidx]
  if (log && (length(int) > 0))
    int <- log(int + max(1 - min(int), 0))
  cbind(time = scantime[scans[massidx]],
        mz = masses[massidx],
        intensity = int)
}
.getPeakGroupsRtMatrix <- function(peaks, peakIndex, sampleIndex, missingSample, extraPeaks) {
  ## For each feature:
  ## o extract the retention time of the peak with the highest intensity.
  ## o skip peak groups if they are not assigned a peak in at least a
  ##   minimum number of samples OR if have too many peaks from the same
  ##   sample assigned to it.
  
  nSamples <- length(sampleIndex)
  rt <- lapply(peakIndex, function(z) {
    cur_fts <- peaks[z, c("rt", "into", "sample"), drop = FALSE]
    
    ## Return NULL if we've got less samples that required or is the total
    ## number of peaks is larger than a certain threshold.
    ## Note that the original implementation is not completely correct!
    ## nsamp > nsamp + extraPeaks might be correct.
    
    nsamp <- length(unique(cur_fts[, "sample"]))
    if (nsamp < (nSamples - missingSample) |
        nrow(cur_fts) > (nsamp + extraPeaks))
      return(NULL)
    cur_fts[] <- cur_fts[order(cur_fts[, 2], decreasing = TRUE), ]
    cur_fts[match(sampleIndex, cur_fts[, 3]), 1]
  })
  rt <- do.call(rbind, rt)
  
  ## Order them by median retention time. NOTE: this is different from the
  ## original code, in which the peak groups are ordered by the median
  ## retention time that is calculated over ALL peaks within the peak
  ## group, not only to one peak selected for each sample (for multi
  ## peak per sample assignments).
  ## Fix for issue #175
  if (is(rt, "matrix")) {
    rt <- rt[order(Biobase::rowMedians(rt, na.rm = TRUE)), , drop = FALSE]
  }
  rt
}
.peakIndex <- function(object) {
  if (inherits(object, "DataFrame")) {
    idxs <- object$peakidx
    names(idxs) <- rownames(object)
  } else {
    if (is.null(object[["FeatureGroupTable"]]))
      stop("No feature definitions present. Please run groupChromPeaks first.")
    idxs <- object[["FeatureGroupTable"]]@listData[["peakidx"]]
    names(idxs) <- rownames(object[["FeatureGroupTable"]])
  }
  idxs
}
.applyRtAdjToChromPeaks <- function(x, rtraw, rtadj) {
  ## Using a for loop here.
  for (i in seq_along(rtraw)) {
    whichSample <- which(x[, "sample"] == i)
    if (length(whichSample)) {
      x[whichSample, c("rt", "rtmin", "rtmax")] <-
        .applyRtAdjustment(x[whichSample, c("rt", "rtmin", "rtmax")],
                           rtraw = rtraw[[i]], rtadj = rtadj[[i]])
    }
  }
  x
}

#' @importFrom stats stepfun
.applyRtAdjustment <- function(x, rtraw, rtadj) {
  ## re-order everything if rtraw is not sorted; issue #146
  if (is.unsorted(rtraw)) {
    idx <- order(rtraw)
    rtraw <- rtraw[idx]
    rtadj <- rtadj[idx]
  }
  adjFun <- stats::stepfun(rtraw[-1] - diff(rtraw) / 2, rtadj)
  res <- adjFun(x)
  ## Fix margins.
  idx_low <- which(x < rtraw[1])
  if (length(idx_low)) {
    first_adj <- idx_low[length(idx_low)] + 1
    res[idx_low] <- x[idx_low] + res[first_adj] - x[first_adj]
  }
  idx_high <- which(x > rtraw[length(rtraw)])
  if (length(idx_high)) {
    last_adj <- idx_high[1] - 1
    res[idx_high] <- x[idx_high] + res[last_adj] - x[last_adj]
  }
  if (is.null(dim(res)))
    names(res) <- names(x)
  res
}
.getChromPeakData <- function(object, peakArea, sample_idx,mzCenterFun = "weighted.mean",cn = c("mz", "rt", "into", "maxo", "sample")) {
  
  ncols <- length(cn)
  res <- matrix(ncol = ncols, nrow = nrow(peakArea))
  colnames(res) <- cn
  res[, "sample"] <- sample_idx
  res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")]
  ## Load the data
  if(!.on.public.web()){
    MessageOutput(paste0("Requesting ", nrow(res), " peaks from ",basename(MSnbase::fileNames(object)), " ... "), "",NULL)
  }
  object <- filterRt(
    object, rt = range(peakArea[, c("rtmin", "rtmax")]) + c(-2, 2))
  
  spctr <- MSnbase::spectra(object, BPPARAM = SerialParam())

  mzs <- lapply(spctr, mz)
  valsPerSpect <- lengths(mzs)
  ints <- unlist(lapply(spctr, intensity), use.names = FALSE)
  rm(spctr)
  
  mzs <- unlist(mzs, use.names = FALSE)
  mzs_range <- range(mzs)
  rtim <- rtime(object)
  rtim_range <- range(rtim)
  
  for (i in seq_len(nrow(res))) {
    rtr <- peakArea[i, c("rtmin", "rtmax")]
    mzr <- peakArea[i, c("mzmin", "mzmax")]
    ## If the rt range is completely out; additional fix for #267
    if (rtr[2] < rtim_range[1] | rtr[1] > rtim_range[2] |
        mzr[2] < mzs_range[1] | mzr[1] > mzs_range[2]) {
      res[i, ] <- rep(NA_real_, ncols)
      next
    }
    ## Ensure that the mz and rt region is within the range of the data.
    rtr[1] <- max(rtr[1], rtim_range[1])
    rtr[2] <- min(rtr[2], rtim_range[2])
    mzr[1] <- max(mzr[1], mzs_range[1])
    mzr[2] <- min(mzr[2], mzs_range[2])
    mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim,
                   valsPerSpect = valsPerSpect, rtrange = rtr,
                   mzrange = mzr)
    if (length(mtx)) {
      if (any(!is.na(mtx[, 3]))) {
        ## How to calculate the area: (1)sum of all intensities / (2)by
        ## the number of data points (REAL ones, considering also NAs)
        ## and multiplied with the (3)rt width.
        ## (1) sum(mtx[, 3], na.rm = TRUE)
        ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used
        ## nrow(mtx) here, which would correspond to the non-NA
        ## intensities within the rt range we don't get the same results
        ## as e.g. centWave. Using max(1, ... to avoid getting Inf in
        ## case the signal is based on a single data point.
        ## (3) rtr[2] - rtr[1]
        res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) *
          ((rtr[2] - rtr[1]) /
             max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)))
        maxi <- which.max(mtx[, 3])
        res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)]
        res[i, c("rtmin", "rtmax")] <- rtr
        ## Calculate the intensity weighted mean mz
        meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
        if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
        res[i, "mz"] <- meanMz
      } else {
        res[i, ] <- rep(NA_real_, ncols)
      }
    } else {
      res[i, ] <- rep(NA_real_, ncols)
    }
  }
  
  if(!.on.public.web()){
    MessageOutput(paste("got ", sum(!is.na(res[, "into"])), "."), "\n", NULL)  
  }
  
  if(.on.public.web()){
    MessageOutput(paste0("Requesting ", nrow(res), " peaks from ",
                         basename(MSnbase::fileNames(object)), " ... ", "got ", 
                         sum(!is.na(res[, "into"])), ".\n"), "",NULL)
  }
  
  return(res)
}

.feature_values <- function(pks, fts, method, value = "into",intensity = "into", colnames, missing = NA) {
  ftIdx <- fts$peakidx
  ## Match columns
  idx_rt <- match("rt", colnames(pks))
  idx_int <- match(intensity, colnames(pks))
  idx_samp <- match("sample", colnames(pks))
  vals <- matrix(nrow = length(ftIdx), ncol = length(colnames))
  nSamples <- seq_along(colnames)
  if (method == "sum") {
    for (i in seq_along(ftIdx)) {
      cur_pks <- pks[ftIdx[[i]], c(value, "sample")]
      int_sum <- split(cur_pks[, value], cur_pks[, "sample"])
      vals[i, as.numeric(names(int_sum))] <-
        unlist(lapply(int_sum, base::sum), use.names = FALSE)
    }
  } else {
    if (method == "medret") {
      medret <- fts$rtmed
      for (i in seq_along(ftIdx)) {
        gidx <- ftIdx[[i]][
          base::order(base::abs(pks[ftIdx[[i]],
                                    idx_rt] - medret[i]))]
        vals[i, ] <- gidx[
          base::match(nSamples, pks[gidx, idx_samp])]
      }
    }
    if (method == "maxint") {
      for (i in seq_along(ftIdx)) {
        gidx <- ftIdx[[i]][
          base::order(pks[ftIdx[[i]], idx_int],
                      decreasing = TRUE)]
        vals[i, ] <- gidx[base::match(nSamples,
                                      pks[gidx, idx_samp])]
      }
    }
    if (value != "index") {
      if (!any(colnames(pks) == value))
        stop("Column '", value, "' not present in the ",
             "chromatographic peaks matrix!")
      vals <- pks[vals, value]
      dim(vals) <- c(length(ftIdx), length(nSamples))
    }
  }
  if (value != "index") {
    if (is.numeric(missing)) {
      vals[is.na(vals)] <- missing
    }
    if (!is.na(missing) & missing == "rowmin_half") {
      for (i in seq_len(nrow(vals))) {
        nas <- is.na(vals[i, ])
        if (any(nas))
          vals[i, nas] <- min(vals[i, ], na.rm = TRUE) / 2
      }
    }
  }
  colnames(vals) <- colnames
  rownames(vals) <- rownames(fts)
  vals
}

#' @importFrom stats median
.groupval <-function(mSet, method = c("medret", "maxint"), value = "into", intensity = "into"){
  
  fgs <- mSet@peakfilling$FeatureGroupTable;
  groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)]);
  rownames(groups) <- NULL;
  groupidx <- fgs$peakidx;
  peaks <- mSet@peakfilling$msFeatureData$chromPeaks;
  
  if (nrow(groups)<1 || length(groupidx) <1) {
    stop("xcmsSet is not been grouped.")
  }
  
  method <- match.arg(method)
  
  peakmat <- peaks
  groupmat <- groups
  groupindex <- groupidx
  
  sampnum <- seq(length = length(rownames(mSet@rawInMemory@phenoData)));
  retcol <- match("rt", colnames(peakmat));
  intcol <- match(intensity, colnames(peakmat));
  sampcol <- match("sample", colnames(peakmat));
  
  values <- matrix(nrow = length(groupindex), ncol = length(sampnum));
  
  if (method == "medret") {
    for (i in seq(along = groupindex)) {
      gidx <- groupindex[[i]][order(abs(peakmat[groupindex[[i]],retcol] - stats::median(peakmat[groupindex[[i]],retcol])))]
      values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
    }
  } else {
    for (i in seq(along = groupindex)) {
      gidx <- groupindex[[i]][order(peakmat[groupindex[[i]],intcol], decreasing = TRUE)]
      values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
    }
  }
  
  if (value != "index") {
    values <- peakmat[values,value]
    dim(values) <- c(length(groupindex), length(sampnum))
  }
  colnames(values) <- rownames(mSet@rawInMemory@phenoData)
  rownames(values) <- paste(round(groupmat[,"mzmed"],1), round(groupmat[,"rtmed"]), sep = "/")
  
  values
}
.get_closest_index <- function(x, idx, method = c("next", "previous",
                                                  "closest")) {
  method <- match.arg(method)
  switch(method,
         `next` = {
           nxt <- idx > x
           if (any(nxt))
             idx[nxt][1]
           else idx[!nxt][sum(!nxt)]
         },
         `previous` = {
           prv <- idx < x
           if (any(prv))
             idx[prv][sum(prv)]
           else idx[!prv][1]
         },
         closest = {
           dst <- abs(idx - x)
           idx[which.min(dst)]
         })
}


### ---- ===== MatchedFilter Sub Kit ==== ---- #####

.aggregateMethod2int <- function(method = "max") {
  .aggregateMethods <- c(1, 2, 3, 4);
  names(.aggregateMethods) <- c("max", "min", "sum", "mean")
  method <- match.arg(method, names(.aggregateMethods))
  return(.aggregateMethods[method])
}

filtfft <- function(y, filt) {
  
  yfilt <- numeric(length(filt))
  yfilt[seq_along(y)] <- y
  yfilt <- fft(fft(yfilt, inverse = TRUE) * filt)
  
  Re(yfilt[seq_along(y)])
}


### ---- ==== Botthom of this sub Kit ==== --- ####



# 
#########         --------------======== Bottom of this Function Kit =============--------          






# ---------------------------2. Peaks Picking_Section-------------------------------

### Functions_Peak Peaking _ used for parameters optimization
#' @title Data Preparation for ChromPeaking Finding
#' @param object MSnExp object.
#' @noRd
#' @import MSnbase
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)
PeakPicking_prep <- function(object){
  
  ## Check if the data is centroided
  centroided <- all(centroided(object))
  if (is.na(centroided)) {
    suppressWarnings(
      centroided <- isCentroided(
        object[[ceiling(length(object) / 3)]])
    )
  }
  if (is.na(centroided) || !centroided)
    warning("Your data appears to be not centroided! CentWave",
            " works best on data in centroid mode.")
  
  
  ##### Revise for Centwave MSnExp -------------
  
  ## (1) split the object per file from MSnExp.
  object_mslevel_i<-splitByFile(object = object, f = factor(c(seq_along(object@phenoData@data[["sample_name"]]))))
  object_mslevel_o<-bplapply(object_mslevel_i, FUN = function(x){x@assayData}, BPPARAM = MulticoreParam(4))
  
  if (.on.public.web()){
    
    print_mes <- "Data Spliting Finished ! \nPeak Preparing Begin...";    
    write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
    
  } else {
    message("Data Spliting Finished !")
    message("Peak Preparing Begin...")
  }
  
  ## (2) use parallel to do the peak detection.
  
  object_mslevel_name<-bplapply(object_mslevel_o,ls);
  object_mslevell<-object_mslevel<-list();
  for (j in seq_along(object_mslevel_o)){
    for (i in seq_along(object_mslevel_name[[j]])){
      #print(i,j,"\n")
      object_mslevell[[i]]<-object_mslevel_o[[j]][[object_mslevel_name[[j]][i]]]
    }
    names(object_mslevell)<-object_mslevel_name[[j]];
    object_mslevel[[j]]<-object_mslevell;
  }
  
  for (i in seq_along(object_mslevel)){
    #for (j in 1:length(object_mslevel[[i]])){
    ncount<-as.numeric(which(sapply(names(object_mslevel[[i]]),is.na)));
    if (!identical(ncount,numeric(0))){
      object_mslevel[[i]]<-object_mslevel[[i]][-ncount]
    }
    #}
  }
  
  if (.on.public.web()){
    
    print_mes <- "Peak Preparing Done !";    
    write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
    
  } else {
    message("Peak Preparing Done !")
  }
  
  return(object_mslevel)
  
  
}

#' @title Calculate PPS method
#' @description Peak picking method. Specifically used for parameters optimization
#' @param object MSnExp object.
#' @param object_mslevel List, prepared by findChromPeaks_prep function.
#' @param param Parameters list.
#' @param msLevel msLevel. Only 1 is supported currently.
#' @import MSnbase
#' @import BiocParallel
#' @noRd
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)
PeakPicking_core <-function(object, object_mslevel, param, msLevel = 1L){
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  if(missing(object) | missing(object_mslevel)){
    stop("Peakpicking cannot continue for missing object! CODE: x10001")
  }
  
  ## Restrict to MS 1 data for now.
  if (length(msLevel) > 1)
    stop("Currently only peak detection in a single MS level is ",
         "supported", call. = FALSE)
  
  if (is.null(param$fwhm)){

    resList <- try(BiocParallel::bplapply(object_mslevel,
                        FUN = PeakPicking_centWave_slave,
                        param = param,
                        BPPARAM = SerialParam()),silent = TRUE)

  } else {
    
    resList <- BiocParallel::bplapply(object_mslevel,
                        FUN = PeakPicking_MatchedFilter_slave,
                        param = param,
                        BPPARAM = SerialParam())
    
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  if(!.optimize_switch){
    MessageOutput("Peak Profiling Finished !", "\n", NULL)
  }
  
  rm(object_mslevel)
  ## (3) PEAK SUMMARY------------
  
  pks <- vector("list", length(resList))
  for (i in seq_along(resList)) {
    n_pks <- nrow(resList[[i]])
    if (is.null(n_pks))
      n_pks <- 0
    if (n_pks == 0) {
      pks[[i]] <- NULL
      if(!.SwapEnv$.optimize_switch){
        warning("No peaks found in sample number ", i, ".")
      }
    } else {
      pks[[i]] <- cbind(resList[[i]], sample = rep.int(i, n_pks))
    }
    
  } 
  pks <- do.call(rbind, pks)
  
  # Make the chrompeaks embeded
  newFd <- list()
  #newFd@.xData <- as.environment(as.list(object@msFeatureData, all.names = TRUE))
  rownames(pks) <- sprintf(paste0("CP", "%0", ceiling(log10(nrow(pks) + 1L)), "d"),
                           seq(from = 1, length.out = nrow(pks)))
  
  newFd$chromPeaks <- pks
  newFd$chromPeakData <- S4Vectors::DataFrame(ms_level = rep(1L, nrow(pks)), is_filled = rep(FALSE, nrow(pks)),
                                              row.names = rownames(pks))
  
  ## mSet Generation
  mSet<-list()
  mSet$msFeatureData <- newFd
  mSet$inMemoryData <- object
  
  return(mSet)
  
}



# -------------------------------3. MS2 Data_Section---------------------------------

# ExtractMS2data <- function(filename, peakParams, mzmin, mzmax){
#   
#   ## Function to extract MSMS spectra 
#   xrms <- readMSData(filename, mode = "onDisk", centroided = TRUE)
#   # get all MS2 spectra from DDA experiment
#   ms2spectra <- spectra(filterMsLevel(xrms, msLevel = 2))  
#   
#   ## Detect chromatographic peaks 
#   # set parameters and find chromatographic peaks 
#   ms1cwp <- CentWaveParam(snthresh = peakParams$sn_thresh, 
#                           noise = 100, ppm = peakParams$ppm, peakwidth = c(peakParams$min_pkw, peakParams$max_pkw))
#   ms1data <- findChromPeaks(xrms, param = ms1cwp, msLevel = 1)
#   
#   #get all peaks 
#   chromPeaks <- chromPeaks(ms1data)
#   
#   # pick one compound 
#   ## find which row contains the mass we want
#   chp <- as.data.frame(chromPeaks)
#   rownum <- which(chp$mz >= mzmin & chp$mz <= mzmax)
#   chromPeak <- chromPeaks[rownum,]
#   
#   #filter out fitting spectra
#   filteredMs2spectra <- .getDdaMS2Scans(chromPeak, ms2spectra)
#   
#   return(filteredMs2spectra)
# }

### Helper function
### function for DDA data 
### Credit: Michael Witting; https://github.com/michaelwitting/metabolomics2018/blob/master/R/msLevelMerge.R
### MS2 spectra need to be list of spectrum2 objects

.getDdaMS2Scans <- function(chromPeak, ms2spectra, mzTol = 0.01, rtTol = 20) {
  
  #make a new list
  filteredMs2spectra <- list()
  
  #isolate only the ones within range
  for(i in seq_along(ms2spectra)) {
    
    #isolate Ms2 spectrum
    ms2spectrum <- ms2spectra[[i]]
    
    #check if within range of peak
    if(abs(chromPeak[4] - ms2spectrum@rt) < rtTol & abs(chromPeak[1] - ms2spectrum@precursorMz) < mzTol) {
      filteredMs2spectra <- c(filteredMs2spectra, ms2spectrum)
    }
  }
  
  #return list with filtered ms2 spectra
  return(filteredMs2spectra)
}




# -----------------------------------4. Annotation-----------------------

##' @references Kuhl C, Tautenhahn R, Boettcher C, Larson TR, Neumann S (2012). 
##' "CAMERA: an integrated strategy for compound spectra extraction and annotation of 
##' liquid chromatography/mass spectrometry data sets." Analytical Chemistry, 84, 283-289. 
##' http://pubs.acs.org/doi/abs/10.1021/ac202450g.

####### ---------- ====== Function Kit From CAMERA ======= ----------- ######/
getPeaks_selection <- function(mSet, method="medret", value="into"){
  
  if (!is(mSet,"mSet")) {
    stop ("Parameter mSet is no mSet object\n")
  }
 
  # Testing if mSet is grouped
  if (nrow(mSet@peakAnnotation$peaks) > 0 && length(fileNames(mSet@rawOnDisk)) > 1) {
    # get grouping information
    groupmat <- mSet@peakAnnotation$groupmat;
    # generate data.frame for peaktable
    ts <- data.frame(cbind(groupmat, groupval(mSet, method=method, value=value)), row.names = NULL)
    #rename column names
    cnames <- colnames(ts)
    if (cnames[1] == 'mzmed') {
      cnames[1] <- 'mz' 
    } else { 
      stop ('Peak information ?!?')
    }
    if (cnames[4] == 'rtmed') {
      cnames[4] <- 'rt' 
    } else {
      stop ('Peak information ?!?')
    }
    colnames(ts) <- cnames
  } else if (length(rownames(mSet@rawOnDisk@phenoData)) == 1) { #Contains only one sample? 
    ts <- mSet@peakAnnotation$peaks;
  } else {
    stop ('First argument must be a xcmsSet with group information or contain only one sample.')
  }
  
  return(as.matrix(ts))
}
groupval <- function(mSet, method = c("medret", "maxint"), value = "index", intensity = "into") {
  
  method <- match.arg(method);
  
  peakmat <- mSet@peakfilling$msFeatureData$chromPeaks;
  groupmat <- mSet@peakAnnotation$groupmat;  
  groupindex <- mSet@peakfilling$FeatureGroupTable$peakidx;
  if(mSet@params[["BlankSub"]]){
    sampnum <- seq(length = length(which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK")))
  } else {
    sampnum <- seq(length = length(rownames(mSet@rawOnDisk@phenoData)))
  }
  
  retcol <- match("rt", colnames(peakmat))
  intcol <- match(intensity, colnames(peakmat))
  sampcol <- match("sample", colnames(peakmat))
  
  values <- matrix(nrow = length(groupindex), ncol = length(sampnum))
  
  if (method == "medret") {
    for (i in seq(along = groupindex)) {
      gidx <- groupindex[[i]][order(abs(peakmat[groupindex[[i]],retcol] - median(peakmat[groupindex[[i]],retcol])))]
      values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
    }
  } else {
    for (i in seq(along = groupindex)) {
      gidx <- groupindex[[i]][order(peakmat[groupindex[[i]],intcol], decreasing = TRUE)]
      values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
    }
  }
  
  if (value != "index") {
    values <- peakmat[values,value]
    dim(values) <- c(length(groupindex), length(sampnum))
  }
  
  if(mSet@params[["BlankSub"]]){
    colnames(values) <- rownames(mSet@rawOnDisk@phenoData)[
      mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"]
  } else {
    colnames(values) <- rownames(mSet@rawOnDisk@phenoData)
  }
  rownames(values) <- paste(round(groupmat[,"mzmed"],1), round(groupmat[,"rtmed"]), sep = "/")
  
  values
}

is.wholenumber <- function(x, tol = .Machine$double.eps^0.5)  abs(x - round(x)) < tol
calcIsotopeMatrix <- function(maxiso=4){
  
  if(!is.numeric(maxiso)){
    stop("Parameter maxiso is not numeric!\n")  
  } else if(maxiso < 1 | maxiso > 8){
    stop(c("Parameter maxiso must between 1 and 8. ",
               "Otherwise use your own IsotopeMatrix.\n"))
  }
  
  isotopeMatrix <- matrix(NA, 8, 4);
  colnames(isotopeMatrix) <- c("mzmin", "mzmax", "intmin", "intmax")
  
  isotopeMatrix[1, ] <- c(1.000, 1.0040, 1.0, 150)
  isotopeMatrix[2, ] <- c(0.997, 1.0040, 0.01, 200)
  isotopeMatrix[3, ] <- c(1.000, 1.0040, 0.001, 200)
  isotopeMatrix[4, ] <- c(1.000, 1.0040, 0.0001, 200)
  isotopeMatrix[5, ] <- c(1.000, 1.0040, 0.00001, 200)
  isotopeMatrix[6, ] <- c(1.000, 1.0040, 0.000001, 200)
  isotopeMatrix[7, ] <- c(1.000, 1.0040, 0.0000001, 200)
  isotopeMatrix[8, ] <- c(1.000, 1.0040, 0.00000001, 200)  
  
  return(isotopeMatrix[seq_len(maxiso), , drop=FALSE])
  
}
findIsotopesPspec <- function(isomatrix, mz, ipeak, int, params){
  #isomatrix - isotope annotations (5 column matrix)
  #mz - m/z vector, contains all m/z values from specific pseudospectrum
  #int - int vector, see above
  #maxiso - how many isotopic peaks are allowed
  #maxcharge - maximum allowed charge
  #devppm - scaled ppm error
  #mzabs - absolut error in m/z
  
  #matrix with all important informationen
  spectra <- matrix(c(mz, ipeak), ncol=2)
  int     <- int[order(spectra[, 1]), , drop=FALSE]
  spectra <- spectra[order(spectra[, 1]), ];    
  cnt     <- nrow(spectra);
  #isomatrix <- matrix(NA, ncol=5, nrow=0)
  #colnames(isomatrix) <- c("mpeak", "isopeak", "iso", "charge", "intrinsic")
  
  #calculate error
  error.ppm <- params$devppm * mz;
  #error.abs <- ),1, function(x) x + params$mzabs*rbind(1,2,3)));
  
  #for every peak in pseudospectrum
  for (j in seq_len(length(mz) - 1)){
    #create distance matrix
    MI <- spectra[j:cnt, 1] - spectra[j, 1];
    #Sum up all possible/allowed isotope distances + error(ppm of peak mz and mzabs)
    max.index <- max(which(MI < (sum(params$IM[seq_len(params$maxiso), "mzmax"]) + error.ppm[j] + params$mzabs )))
    #check if one peaks falls into isotope window
    if(max.index == 1){
      #no promising candidate found, move on
      next;
    }
    
    #IM - isotope matrix (column diffs(min,max) per charge, row num. isotope)
    IM <- t(sapply(seq_len(params$maxcharge),
                   function(x){
      mzmin <- (params$IM[, "mzmin"]) / x;
      mzmax <- (params$IM[, "mzmax"]) / x;
      error <-      (error.ppm[j]+params$mzabs) / x
      res   <- c(0,0);
      for(k in seq_along(mzmin)){
        res <- c(res, mzmin[k]+res[2*k-1], mzmax[k]+res[2*k])
      }
      res[seq(1,length(res),by=2)] <- res[seq(1,length(res),by=2)]-error
      res[seq(2,length(res),by=2)] <- res[seq(2,length(res),by=2)]+error
      return (res[-seq_len(2)])
    } ))
    
    #Sort IM to fix bug, with high ppm and mzabs values 
    #TODO: Find better solution and give feedback to user!
    IM <- t(apply(IM,1,sort))
    
    #find peaks, which m/z value is in isotope interval
    hits <- t(apply(IM, 1, function(x){ findInterval(MI[seq_len(max.index)], x)}))
    rownames(hits) <- c(seq_len(nrow(hits)))
    colnames(hits) <- c(seq_len(ncol(hits)))
    hits[which(hits==0)] <-NA
    hits <- hits[, -1, drop=FALSE]
    hits.iso <- hits%/%2 + 1;
    
    
    #check occurence of first isotopic peak
    for(iso in seq_len(min(params$maxiso,ncol(hits.iso)))){
      hit <- apply(hits.iso,1, function(x) any(naOmit(x)==iso))
      hit[which(is.na(hit))] <- TRUE
      if(all(hit)) break;
      hits.iso[!hit,] <- t(apply(hits.iso[!hit,,drop=FALSE],1, function(x) {
        if(!all(is.na(x))){
          ini <- which(x > iso)
          if(all(!is.infinite(ini)) && length(ini) > 0){
            x[min(ini):ncol(hits.iso)] <- NA  
          }
        }
        x
      }))
    }
    
    #set NA to 0
    hits[which(is.na(hits.iso))] <- 0
    #check if any isotope is found
    hit <- apply(hits, 1, function(x) sum(x)>0)
    #drop nonhits  
    hits <- hits[hit, , drop=FALSE]
    
    #if no first isotopic peaks exists, next
    if(nrow(hits) == 0){
      next;
    }
    
    #getting max. isotope cluster length
    #TODO: unique or not????
    #isolength <- apply(hits, 1, function(x) length(which(unique(x) %% 2 !=0)))
    #isohits - for each charge, length of peak within intervals
    isohits   <- lapply(seq_len(nrow(hits)), function(x) which(hits[x, ] %% 2 !=0))
    isolength <- sapply(isohits, length)
    
    #Check if any result is found
    if(all(isolength==0)){
      next;
    }
    
    #itensity checks
    #candidate.matrix
    #first column - how often succeded the isotope intensity test
    #second column - how often could a isotope int test be performed
    candidate.matrix <- matrix(0, nrow=length(isohits), ncol=max(isolength)*2);
    
    for(iso in seq_along(isohits)){
      for(candidate in seq_along(isohits[[iso]])){
        for(sample.index in c(seq_len(ncol(int)))){
          #Test if C12 Peak is NA
          if(!is.na(int[j, sample.index])){              
            #candidate.matrix[maxIso, 1] <- candidate.matrix[maxIso, 1] + 1
          }
          charge <- as.numeric(row.names(hits)[iso])
          int.c12 <- int[j, sample.index]
          isotopePeak <- hits[iso,isohits[[iso]][candidate]]%/%2 + 1;
          if(isotopePeak == 1){
            #first isotopic peak, check C13 rule
            int.c13 <- int[isohits[[iso]][candidate]+j, sample.index];
            int.available <- all(!is.na(c(int.c12, int.c13)))
            if (int.available){
              theo.mass <- spectra[j, 1] * charge; #theoretical mass
              numC      <- abs(round(theo.mass / 12)); #max. number of C in molecule
              inten.max <- int.c12 * numC * 0.011; #highest possible intensity
              inten.min <- int.c12 * 1    * 0.011; #lowest possible intensity
              if((int.c13 < inten.max && int.c13 > inten.min) || !params$filter){
                candidate.matrix[iso,candidate * 2 - 1] <- candidate.matrix[iso,candidate * 2 - 1] + 1
                candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
              }else{
                candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
              }
            } else {
              #todo
            } 
          } else {
            #x isotopic peak
            int.cx <- int[isohits[[iso]][candidate]+j, sample.index];
            int.available <- all(!is.na(c(int.c12, int.cx)))
            if (int.available) {
              intrange <- c((int.c12 * params$IM[isotopePeak,"intmin"]/100),
                            (int.c12 * params$IM[isotopePeak,"intmax"]/100))
              #filter Cx isotopic peaks muss be smaller than c12
              if(int.cx < intrange[2] && int.cx > intrange[1]){
                candidate.matrix[iso,candidate * 2 - 1] <- candidate.matrix[iso,candidate * 2 - 1] + 1
                candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1                        
              }else{
                candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
              }
            } else {
              candidate.matrix[iso,candidate * 2 ] <- candidate.matrix[iso,candidate * 2] + 1
            }#end int.available
          }#end if first isotopic peak
        }#for loop samples
      }#for loop candidate
    }#for loop isohits
    
    #calculate ratios
    candidate.ratio <- candidate.matrix[, seq(from=1, to=ncol(candidate.matrix),
                                              by=2)] / candidate.matrix[, seq(from=2, 
                                                                              to=ncol(candidate.matrix), by=2)];
    if(is.null(dim(candidate.ratio))){
      candidate.ratio <- matrix(candidate.ratio, nrow=nrow(candidate.matrix))
    }
    if(any(is.nan(candidate.ratio))){
      candidate.ratio[which(is.nan(candidate.ratio))] <- 0;
    }
    
    #decision between multiple charges or peaks
    for(charge in seq_len(nrow(candidate.matrix))) {
      if(any(duplicated(hits[charge, isohits[[charge]]]))){
        #One isotope peaks has more than one candidate
        ##check if problem is still consistent
        for(iso in unique(hits[charge, isohits[[charge]]])){
          if(length(index <- which(hits[charge, isohits[[charge]]]==iso))== 1){
            #now duplicates next
            next;
          }else{
            #find best
            index2 <- which.max(candidate.ratio[charge, index]);
            save.ratio <- candidate.ratio[charge, index[index2]]
            candidate.ratio[charge,index] <- 0
            candidate.ratio[charge,index[index2]] <- save.ratio
            index <- index[-index2]
            isohits[[charge]] <- isohits[[charge]][-index]
          }
        }
      }#end if
      
      for(isotope in seq_len(ncol(candidate.ratio))) {
        if(candidate.ratio[charge, isotope] >= params$minfrac){
          isomatrix <- rbind(isomatrix, 
                             c(spectra[j, 2],
                               spectra[isohits[[charge]][isotope]+j, 2], 
                               isotope, as.numeric(row.names(hits)[charge]), 0))
        } else{
          break;
        }
      }
    }#end for charge
  }#end for j
  
  return(isomatrix)
}
resolveFragmentConnections <- function(hypothese){
  #Order hypothese after mass
  hypothese <- hypothese[order(hypothese[, "mass"], decreasing=TRUE), ]
  
  for(massgrp in unique(hypothese[, "massgrp"])){
    index <- which(hypothese[, "massgrp"] == massgrp & !is.na(hypothese[, "parent"]))
    if(length(index) > 0) {
      index2 <- which(hypothese[, "massID"] %in% hypothese[index, "massID"] & hypothese[, "massgrp"] != massgrp)
      if(length(index2) > 0){
        massgrp2del <- which(hypothese[, "massgrp"] %in% unique(hypothese[index2, "massgrp"]))
        hypothese <- hypothese[-massgrp2del, ]
      }
    }
  }
  return(hypothese)
}
addFragments <- function(hypothese, rules, mz){
  #check every hypothese grp
  fragments <- rules[which(rules[, "typ"] == "F"), , drop=FALSE]
  hypothese <- cbind(hypothese, NA);
  colnames(hypothese)[ncol(hypothese)] <- "parent"
  if(nrow(fragments) < 1){
    #no fragment exists in rules
    return(hypothese)
  }
  
  orderMZ <- cbind(order(mz),order(order(mz)))
  sortMZ <- cbind(mz,seq_along(mz))
  sortMZ <- sortMZ[order(sortMZ[,1]),]
  
  for(massgrp in unique(hypothese[, "massgrp"])){
    for(index in which(hypothese[, "ruleID"] %in% unique(fragments[, "parent"]) & 
                       hypothese[, "massgrp"] == massgrp)){
      massID <- hypothese[index, "massID"]
      ruleID <- hypothese[index, "ruleID"]
      indexFrag <- which(fragments[, "parent"] == ruleID)
      
      while(length(massID) > 0){
        result <- fastMatchR(sortMZ[seq_len(orderMZ[massID[1],2]),1], mz[massID[1]] + 
                              fragments[indexFrag, "massdiff"], tol=0.05)
        invisible(sapply(seq_len(orderMZ[massID[1],2]), function(x){
          if(!is.null(result[[x]])){
            massID <<- c(massID, orderMZ[x,1]);        
            indexFrags <- indexFrag[result[[x]]];
            tmpRes <- cbind(orderMZ[x,1], as.numeric(rownames(fragments)[indexFrags]), fragments[indexFrags, c("nmol", "charge")],
                            hypothese[index, "mass"], fragments[indexFrags, c("score")],
                            massgrp, 1, massID[1], deparse.level=0)
            colnames(tmpRes) <- colnames(hypothese)
            hypothese <<- rbind(hypothese, tmpRes);
          }
        }))
        massID <- massID[-1];
      }    
    }
  }
  return(hypothese)
}
create.matrix <- function(dim1,dim2) {
  x <- matrix()
  length(x) <- dim1*dim2
  dim(x) <- c(dim1,dim2)
  x
}
getAllPeakEICs <- function(mSet, index=NULL){
  
  #Checking parameter index
  if(is.null(index)){
    stop("Parameter index is not set.\n")
  }else if(length(index) != nrow(mSet@peakAnnotation$AnnotateObject$groupInfo)){
    stop("Index length must equals number of peaks.\n")
  }
  
  allfiles <- mSet@rawOnDisk@processingData@files;
  
  if(mSet@params$BlankSub){
    blkidx <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK")
    nfiles <- length(blkidx)
    allfiles <- allfiles[blkidx]
  } else {
    nfiles <- length(allfiles)
  }
  
  scantimes <- list()
  
  if(nfiles == 1){
    #Single sample experiment
    if (file.exists(allfiles[1])) {
      
      mSet_splits <- mSet@rawOnDisk
      scantimes[[1]] <- scan_length <- sapply(mSet_splits, length)
      maxscans <- max(scan_length)
      mset <-list();
      #xraw <- xcmsRaw(filepaths(mSet$xcmsSet)[1],profstep=0)
      #maxscans <- length(xraw@scantime)
      #scantimes[[1]] <- xraw@scantime
      mset@rawOnDisk <- mSet_splits
      
      pdata <- as.data.frame(mSet@peakAnnotation$peaks) 
      
      EIC <- create.matrix(nrow(pdata),maxscans)
      EIC[,] <- getEIC4Peaks(mset,pdata,maxscans) #getEIC4Peaks(xraw,pdata,maxscans)
      
    } else {
      stop('Raw data file:',mSet$xcmsSet@filepaths[1],' not found ! \n');
    }
  } else {
    #Multiple sample experiment
    gval <- groupval(mSet);
    MessageOutput(paste0('Generating EIC\'s.'),"",NULL)
    
    #na flag, stores if sample contains NA peaks
    na.flag <- 0;   maxscans <- 0;
    mSet_splits <- split(mSet@rawOnDisk,fromFile(mSet@rawOnDisk));
    if(mSet@params$BlankSub){
      blkidx <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK");
      mSet_splits <- mSet_splits[blkidx];
    }
    MessageOutput(".","",NULL)
    
    if (file.exists(allfiles[1])) { 
      
      scan_length <- sapply(mSet_splits, length)
      maxscans <- max(scan_length)
      MessageOutput(".","",NULL)
      
    } else {      
      stop('Raw data file:',allfiles[1],' not found ! \n');    
    }
    
    #generate EIC Matrix
    EIC <- create.matrix(nrow(gval),maxscans)
    mset <-list();mset$env <- new.env(parent=.GlobalEnv)
    MessageOutput(".","\n",NULL)
    
    #loop over all samples
    for (f in seq_len(nfiles)){
      MessageOutput(paste("Detecting ",basename(allfiles)[f]," ... "),"",NULL)

      #which peaks should read from this sample    
      idx.peaks <- which(index == f);
      
      #check if we need data from sample f
      if(length(idx.peaks) == 0){
        MessageOutput(paste0("0 peaks found!"),"\n",NULL)
        next;
      }
      
      #check if raw data file of sample f exists
      if (file.exists(allfiles[f])) {
        
        #read sample        
        scantimes[[f]] <- scan_length[f];        
        pdata <- as.data.frame(mSet@peakAnnotation$peaks[gval[idx.peaks,f],,drop=FALSE]) # data for peaks from file f
        
        #Check if peak data include NA values
        if(length(which(is.na(pdata[,1]))) > 0){na.flag <- 1;}
        mset$onDiskData <- mSet_splits[[f]];
        
        #Generate raw data according to peak data
        EIC[idx.peaks,] <- getEIC4Peaks(mset, pdata, maxscans);
        
      } else {
        stop('Raw data file:',allfiles[f],' not found ! \n')
      }
    }
    
    if(na.flag ==1){
      MessageOutput("Warning: Found NA peaks in selected sample.", "\n",NULL);
    }
  }
  invisible(list(scantimes=scantimes,EIC=EIC)); 
}
fastMatchR <- function(x,y,tol=0.001, symmetric=FALSE) {
  
  if (any(is.na(y)))
    stop("NA's are not allowed in y !\n")
  ok <- !(is.na(x))
  ans <- order(x)
  keep <- seq_along(ok)[ok]
  xidx <- ans[ans %in% keep]
  xs <- x[xidx]
  yidx <- order(y)
  ys <- y[yidx] 
  if (!is.double(xs)) 
    xs <- as.double(xs)
  if (!is.double(ys)) 
    ys<- as.double(ys)
  if (!is.integer(xidx)) 
    xidx <- as.integer(xidx)
  if (!is.integer(yidx)) 
    yidx <- as.integer(yidx)  
  
  fm <- .Call("fastMatch", xs, ys, xidx, yidx, as.integer(length(x)), as.double(tol) , PACKAGE ='OptiLCMS') 
  fm2 <- vector("list", length=length(fm))
  #stop("!")
  if (symmetric){  
    for (a in seq_along(fm)) {
      if (!is.null(fm[[a]][1])){
        tmp<-NULL
        for (b in seq_along(fm[[a]])){
          if ((abs(x[a]-y[fm[[a]]][b]) == min(abs(x[a]-y[fm[[a]][b]]),
                                              abs(x[a]  -y[fm[[a]][b]-1]),
                                              abs(x[a]  -y[fm[[a]][b]+1]),
                                              abs(x[a-1]-y[fm[[a]][b]]),
                                              abs(x[a+1]-y[fm[[a]][b]]), na.rm=TRUE)
          )) {
            tmp<-c(tmp, fm[[a]][b])
          }
        }
        fm2[[a]]<-tmp
      }
    }
  }else {
    fm2 <- fm}
  fm2
}
combineCalc <- function(object1, object2, method = "sum"){
  
  if(ncol(object1) != 4){
    stop("first object is not a matrix with 4 columns");
  }
  if(ncol(object2) != 4){
    stop("second object is not a matrix with 4 columns");
  }
  
  combination = new.env(hash = TRUE)
  
  apply(object1,1,function(x){ 
    combination[[paste(x[c(1,2,4)],collapse=" ")]]<- x[3] 
  })
  
  apply(object2,1,function(x){
    if(is.null(combination[[paste(x[c(1,2,4)],collapse=" ")]])){
      combination[[paste(x[c(1,2,4)],collapse=" ")]]<- x[3]
    }else{
      combination[[paste(x[c(1,2,4)],collapse=" ")]]<- combination[[paste(x[c(1,2,4)],collapse=" ")]] + x[3];
    }
  })
  
  if(!is.null(combination[["NA NA NA"]])){
    rm("NA NA NA",envir=combination)
  }
  
  resMat <- matrix(ncol=4, nrow=length(ls(combination)));
  
  i<-1;y<-c();
  sapply(ls(combination), function(x) {
    y[c(1,2,4)] <- unlist(strsplit(x," "));
    y[3] <- combination[[x]];
    resMat[i,] <<- y; i<<-i+1;
  })
  resMat <- matrix(as.numeric(resMat), ncol=4);
  colnames(resMat) <- c("x","y","cor","ps")
  
  return(invisible(resMat));
}
mpi.comm.size <- function(){
  return(1)
}

#' calcCiS
#' @noRd
#' @importFrom Hmisc rcorr

calcCiS<- function(mSet, EIC=EIC, corval=0.75, pval=0.05, psg_list=NULL ){
  
  #Columns Peak 1, Peak 2, correlation coefficienct, Pseudospectrum Index
  resMat <- create.matrix(100000,4);
  colnames(resMat) <- c("x","y","cor","ps")
  cnt <- 0;
  
  npeaks <- nrow(mSet@peakAnnotation$AnnotateObject$groupInfo);
  
  ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra, length));
  npeaks.global <- 0; #Counter for % bar
  npspectra <- length(mSet@peakAnnotation$AnnotateObject$pspectra);
  
  #Check if object have been preprocessed with groupFWHM
  if(npspectra < 1){
    npspectra <- 1;
    mSet@peakAnnotation$AnnotateObject$pspectra[[1]] <- seq_len(nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
    MessageOutput(paste0('Calculating peak correlations in 1 group.'),"\n",NULL)

    lp <- -1;
    pspectra_list     <- 1;
    mSet@peakAnnotation$AnnotateObject$psSamples  <- 1;
  }else{
    if(is.null(psg_list)){
      MessageOutput(paste('Calculating peak correlations in',npspectra,'Groups... '),"\n",NULL);
      lp <- -1;
      pspectra_list <- seq_len(npspectra);
    }else{
      MessageOutput(paste('Calculating peak correlations in',length(psg_list),'Groups... '),"\n",NULL);
      lp <- -1;
      pspectra_list <- psg_list;
      ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra[psg_list],length));
    }
  }
  
  if(dim(EIC)[2] != npeaks){
    EIC <- t(EIC);
    #Second check, otherwise number of peaks != number of EIC curves
    if(dim(EIC)[2] != npeaks){
      stop(c("Wrong dimension of EIC. It has ",dim(EIC)[1]," Rows for ",npeaks,"peaks"));
    }
  }
  lp <- -1;
  #Iterate over all PS-spectra
  pb <- progress_bar$new(format = "Generating [:bar] :percent Time left: :eta", total = length(pspectra_list), clear = TRUE, width= 75)
  
  for(j in seq_along(pspectra_list)){
    i  <- pspectra_list[j];
    pi <- mSet@peakAnnotation$AnnotateObject$pspectra[[i]];
    npi <- length(pi);
    
    if( ((npi^2)/2 + cnt)  >= nrow(resMat)){
      #resize resMat
      resMat <- rbind(resMat,create.matrix(100000,4));
    }
    pb$tick();
    #Need at least two peaks for correlation
    if(npi < 2) {
      next;
    }
    
    #Suppress warnings
    res <- suppressWarnings(Hmisc::rcorr(EIC[, pi], type="pearson"));
    
    #Set lower triangle to NA
    res$r[lower.tri(res$r,diag = TRUE)] <- NA;
    res$P[lower.tri(res$P,diag = TRUE)] <- NA;
    
    #Find peaks with have correlation higher corr_threshold and p <= 0.05
    index <- which(( res$r > corval) & (res$P <= pval))
    if( (length(index) + cnt)  >= nrow(resMat)){
      #resize resMat
      resMat <- rbind(resMat, create.matrix(max(length(index)+1,100000),4));
    }
    
    if(length(index) > 0){
      for(x in seq_along(index)){
        col <- index[x] %/% npi + 1;
        row <- index[x] %%  npi;
        if(row == 0){
          row <- npi;
          col <- col - 1;
        }
        xi <- pi[row];
        yi <- pi[col];
        #y > x should be satisfied for every value, since we have set lower.tri to NA
        cnt<-cnt+1;
        resMat[cnt,] <- c(xi,yi,res$r[row, col],i);
      }
    }else{
      next;
    }
  }
  
  return(invisible(resMat[seq_len(cnt),,drop=FALSE]))
}
calcPC.hcs <- function(mSet, ajc=NULL,psg_list=NULL) {
  
  npspectra <- length(mSet@peakAnnotation$AnnotateObject$pspectra);
  pspectra  <- mSet@peakAnnotation$AnnotateObject$pspectra
  psSamples <- mSet@peakAnnotation$AnnotateObject$psSamples;
  npeaks.global <- 0;
  ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra, length));
  colnames(ajc)[3] <- c("weight") ##todo: Change to generell ajc interface
  
  #Information for % output
  if(is.null(psg_list)){
    MessageOutput(paste('Calculating graph cross linking in', npspectra, 'Groups...'), "\n", NULL)
    
    lperc <- -1;
    pspectra_list <- seq_len(npspectra);
    ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra, length));
  }else{
    MessageOutput(paste('Calculating graph cross linking in', npspectra, 'Groups...'), "\n", NULL)
    lperc <- -1;
    pspectra_list <- psg_list;
    ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra[psg_list], length));
  }
  
  #peak counter
  npeaks <- 0;
  lp <- -1;
  pb <- progress_bar$new(format = "Calculating [:bar] :percent Time left: :eta", total = length(pspectra_list), clear = TRUE, width= 75)
  
  for(j in seq_along(pspectra_list)){
    i  <- pspectra_list[j];#index of pseudospectrum
    pi <- mSet@peakAnnotation$AnnotateObject$pspectra[[i]]; #peak_id in pseudospectrum
    names(pi) <- as.character(pi);
    
    #percent output
    pb$tick();
    #end percent output
    
    index <- which(ajc[,4] == i)
    if(length(index) > 200000) next;
    if(length(index) < 1){
      g <- ftM2graphNEL(matrix(nrow=0,ncol=2),V=as.character(pi),edgemode="undirected")
    }else{
      g <- ftM2graphNEL(ajc[index,seq_len(2),drop=FALSE],W=ajc[index,3,drop=FALSE], V=as.character(pi), edgemode="undirected");
    }
    hcs <- highlyConnSG(g);
    
    #order cluster after size
    cnts <- sapply(hcs$clusters,length);
    grps <- seq_along(hcs$clusters);     
    grps <- grps[order(cnts, decreasing = TRUE)]
    
    for (ii in seq_along(grps)){
      if(ii==1){
        #save old pspectra number
        pspectra[[i]] <- as.integer(hcs$cluster[[grps[ii]]])
        pi[hcs$cluster[[grps[ii]]]] <- NA
      } else {
        npspectra <- npspectra +1
        pspectra[[npspectra]] <- as.integer(hcs$cluster[[grps[ii]]])
        pi[hcs$cluster[[grps[ii]]]] <- NA
        psSamples[npspectra] <- psSamples[i]
      }
    }
    index <- which(!is.na(pi));
    if(length(index)>0){
      for(ii in seq_along(index)){
        npspectra <- npspectra +1
        pspectra[[npspectra]] <- pi[index[ii]]
        psSamples[npspectra] <- psSamples[i]
      }
    }
  }
  
  mSet@peakAnnotation$AnnotateObject$pspectra  <- pspectra;
  mSet@peakAnnotation$AnnotateObject$psSamples <- psSamples;
  
  MessageOutput(paste("New number of ps-groups: ",length(pspectra)), "\n", NULL)
  return(mSet)
}
findAdducts <- function(mSet, ppm=5, mzabs=0.015, multiplier=3, polarity=NULL, rules=NULL, 
                        max_peaks=100, psg_list=NULL, intval="maxo",maxcharge){
  
  if(!is.vector(rules) && !is.null(rules)){
    stop("Your customized adducts rules are wrong! It should a vector like c('[M-H]-','[M-2H]2-','[M-3H]3-')");
  }
  multFragments=FALSE;
  # Scaling ppm factor
  devppm <- ppm / 1000000;
  # counter for % bar
  npeaks.global <- 0;
  
  # get mz values from peaklist
  imz <- mSet@peakAnnotation$AnnotateObject$groupInfo[, "mz"];
  
  #number of pseudo-spectra
  npspectra <- length(mSet@peakAnnotation$AnnotateObject$pspectra);
  
  #If groupCorr or groupFHWM have not been invoke, select all peaks in one sample
  if(npspectra < 1){ 
    npspectra <- 1;
    mSet@peakAnnotation$AnnotateObject$pspectra[[1]] <- seq_len(nrow(mSet@peakAnnotation$AnnotateObject$groupInfo)); 
  }
  
  if(mSet@peakAnnotation$AnnotateObject$sample == 1 && length(rownames(pData(mSet@rawOnDisk))) == 1){
    ##one sample case 
    mint <- mSet@peakAnnotation$AnnotateObject$groupInfo[, intval];
  }else{
    ##multiple sample
    if(is.na(mSet@peakAnnotation$AnnotateObject$sample[1])){
      if(mSet@params[["BlankSub"]]){
        index <- seq_along(which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"))
      } else {
        index <- seq_along(mSet@rawOnDisk@processingData@files);
      }
    }else{
      index <- mSet@peakAnnotation$AnnotateObject$sample;
    }
    
    MessageOutput(paste0("Generating peak matrix for peak annotation!"), "\n", NULL)

    mint     <- groupval(mSet,value=intval)[, index, drop=FALSE];
    peakmat  <- mSet@peakAnnotation$peaks;
    groupmat <- mSet@peakAnnotation$groupmat;
    
    imz <- groupmat[, "mzmed"];
    irt <- groupmat[, "rtmed"];
    int.val <- c();
    nsample <- length(mSet@peakAnnotation$AnnotateObject$sample);
  }
  
  # isotopes
  isotopes  <- mSet@peakAnnotation$AnnotateObject$isotopes;
  # adductlist
  derivativeIons <- vector("list", length(imz));
  
  # other variables
  oidscore <- c();
  index    <- c();
  annoID   <- matrix(ncol=4, nrow=0)
  annoGrp  <- matrix(ncol=4, nrow=0)
  colnames(annoID)  <-  colnames(mSet@peakAnnotation$AnnotateObject$annoID)
  colnames(annoGrp) <-  colnames(mSet@peakAnnotation$AnnotateObject$annoGrp)
  
  ##Examine polarity and rule set
  if(!(mSet@peakAnnotation$AnnotateObject$polarity == "")){
    
    MessageOutput(paste("Polarity is set in annotaParam:", mSet@peakAnnotation$AnnotateObject$polarity),"\n",NULL)

    if(is.null(rules)){
      if(!is.null(mSet@peakAnnotation$AnnotateObject$ruleset)){
        rules <- mSet@peakAnnotation$AnnotateObject$ruleset;
      }else{
        
        MessageOutput(paste0("Ruleset could not read from object! Recalculating..."),"\n",NULL)
        
        rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
                           polarity=mSet@peakAnnotation[["AnnotateObject"]][["polarity"]], 
                           lib.loc= .libPaths(), multFragments=multFragments);
        mSet@peakAnnotation$AnnotateObject$ruleset <- rules;
      }
    }else{ 
      
      rule0 <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
                         polarity=mSet@peakAnnotation[["AnnotateObject"]][["polarity"]], 
                         lib.loc= .libPaths(), multFragments=multFragments);
      
      rrowidx <- vector();
      for (r in rules){
        rrowidx <- c(rrowidx, which(r == rule0$name))
      }
      
      mSet@peakAnnotation$AnnotateObject$ruleset <- rule0[rrowidx, ];
      MessageOutput("Found and use user-defined ruleset!","\n",NULL)
    }
    
  } else {
    
    if(!is.null(polarity)){
      if(polarity %in% c("positive","negative")){
        if(is.null(rules)){
          rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, 
                             nh=2, polarity=polarity, lib.loc= .libPaths(),
                             multFragments=multFragments);
          #save ruleset
          mSet@peakAnnotation$AnnotateObject$ruleset <- rules;
        }else{
          rule0 <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
                             polarity=mSet@peakAnnotation[["AnnotateObject"]][["polarity"]], 
                             lib.loc= .libPaths(), multFragments=multFragments);
          rrowidx <- vector();
          for (r in rules){
            rrowidx <- c(rrowidx, which(r == rule0$name))
          }
          
          mSet@peakAnnotation$AnnotateObject$ruleset <- rule0[rrowidx, ];
          MessageOutput("Found and use user-defined ruleset!","\n",NULL)
          }
        mSet@peakAnnotation$AnnotateObject$polarity <- polarity;
      }else stop("polarity mode unknown, please choose between positive and negative.")
      
    }else if(length(mSet@peakAnnotation$AnnotateObject$polarity) > 0){
      
      index <- which(pData(mSet@rawOnDisk)[[1]] == mSet@peakAnnotation$AnnotateObject$category)[1] + 
        mSet@peakAnnotation$AnnotateObject$sample-1;
      if(mSet@peakAnnotation$AnnotateObject$polarity[index] %in% c("positive","negative")){
        if(is.null(rules)){
          rules <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, 
                             nh=2, polarity=mSet$xcmsSet@polarity[index], 
                             lib.loc= .libPaths(), multFragments=multFragments);
          #save ruleset
          mSet@peakAnnotation$AnnotateObject$ruleset <- rules;
        } else {
          rule0 <- calcRules(maxcharge=maxcharge, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
                             polarity=mSet@peakAnnotation[["AnnotateObject"]][["polarity"]], 
                             lib.loc= .libPaths(), multFragments=multFragments);
          rrowidx <- vector();
          for (r in rules){
            rrowidx <- c(rrowidx, which(r == rule0$name))
          }
          
          mSet@peakAnnotation$AnnotateObject$ruleset <- rule0[rrowidx, ];
          MessageOutput("Found and use user-defined ruleset!","\n",NULL)
        }
        mSet@peakAnnotation$AnnotateObject$polarity <- polarity;
      }else stop("polarity mode in xcmsSet unknown, please define variable polarity.")
      
    }else stop("polarity mode could not be estimated from the xcmsSet, please define variable polarity!")
    
  }
  
  mSet@peakAnnotation$AnnotateObject$ruleset -> rules;
  
  ##Run as single or parallel mode
  runParallel <- 0;

  if(mSet@peakAnnotation$AnnotateObject$runParallel$enable == 1){
    if(!(is.null(mSet@peakAnnotation$AnnotateObject$runParallel$cluster)) || mpi.comm.size() > 0 ){
      runParallel <- 1;
    }else{
      warning("CAMERA runs in parallel mode, but no slaves are spawned!\nRun in single core mode!\n");
      runParallel <- 0;
    }
  }else{
    runParallel <- 0;
  }

  if("quasi" %in% colnames(rules)){
    #backup for old rule sets
    quasimolion <- which(rules[, "quasi"]== 1) 
  }else{
    quasimolion <- which(rules[, "mandatory"]== 1)
  }
  
  #Remove recognized isotopes from annotation m/z vector
  for(x in seq(along = isotopes)){
    if(!is.null(isotopes[[x]])){
      if(isotopes[[x]]$iso != 0){
        imz[x] <- NA;
      }
    }
  }
  
  #counter for % bar
  npeaks    <- 0; 
  massgrp   <- 0;
  ncl <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra, length));  
  
  if (runParallel == 1) { ## ... we run in parallel mode
    # if(is.null(psg_list)){
    #   MessageOutput(paste('Calculating possible adducts in',npspectra,'Groups... '), "\n", NULL)
    # 
    #   lp <- -1;
    #   pspectra_list <- 1:npspectra;
    # }else{
    #   MessageOutput(paste('Calculating possible adducts in',length(psg_list),'Groups... '), "\n", NULL)
    #   lp <- -1;
    #   pspectra_list <- psg_list;
    # }
    # 
    # argList <- list();
    # 
    # cnt_peak <- 0;
    # if(is.null(max_peaks)){
    #   max_peaks=100;
    # }
    # paramsGlobal <- list();
    # if("typ" %in% colnames(rules)){
    #   rules.idx <- which(rules[, "typ"]== "A")
    #   parent <- TRUE;
    # }else{
    #   #backup for old rule sets
    #   rules.idx <- 1:nrow(rules);
    #   parent <- FALSE;
    # }
    # 
    # #Add params to env
    # paramsGlobal <- list()
    # paramsGlobal$pspectra <- mSet@peakAnnotation$AnnotateObject$pspectra;
    # paramsGlobal$imz <- imz;
    # paramsGlobal$rules <- rules;
    # paramsGlobal$mzabs <- mzabs;
    # paramsGlobal$devppm <- devppm;
    # paramsGlobal$isotopes <- isotopes;
    # paramsGlobal$quasimolion <- quasimolion;
    # paramsGlobal$parent <- parent;
    # paramsGlobal$rules.idx <- rules.idx;
    # #create params
    # #paramsGlobal <- list2env(params)
    # 
    # params <- list();
    # 
    # for(j in 1:length(pspectra_list)){
    #   i <- pspectra_list[j];
    #   params$i[[length(params$i)+1]] <- i;
    #   cnt_peak <- cnt_peak+length(mSet@peakAnnotation$AnnotateObject$pspectra[[i]]);
    #   if(cnt_peak > max_peaks || j == length(pspectra_list)){
    #     argList[[length(argList)+1]] <- params
    #     cnt_peak <- 0;
    #     params <- list();
    #   }
    # }
    # 
    # #Some informationen for the user
    # cat(paste("Parallel mode: There are",length(argList), "tasks.\n"))
    # 
    # if(is.null(mSet@peakAnnotation$AnnotateObject$runParallel$cluster)){
    #   #Use MPI
    #   #result <- xcmsPapply(argList, annotateGrpMPI2, paramsGlobal)
    # }else{
    #   #For snow
    #   #result <- xcms:::xcmsClusterApply(cl=mSet$AnnotateObject$runParallel$cluster, 
    #   #                                  x=argList, fun=annotateGrpMPI, 
    #   #                                  msgfun=msgfun.snowParallel,
    #   #                                  paramsGlobal)
    # }
    # 
    # for(ii in 1:length(result)){
    #   if(length(result[[ii]]) == 0){
    #     next;
    #   }
    #   for(iii in 1:length(result[[ii]])){
    #     hypothese <- result[[ii]][[iii]];
    #     if(is.null(hypothese)){
    #       next;
    #     }
    #     charge <- 0;
    #     old_massgrp <- 0;
    #     index <- argList[[ii]]$i[[iii]];
    #     ipeak <- mSet@peakAnnotation$AnnotateObject$pspectra[[index]];
    #     for(hyp in 1:nrow(hypothese)){
    #       peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]);
    #       if(old_massgrp != hypothese[hyp,"massgrp"]) {
    #         massgrp <- massgrp+1;
    #         old_massgrp <- hypothese[hyp,"massgrp"];
    #         annoGrp <- rbind(annoGrp,c(massgrp,hypothese[hyp,"mass"],
    #                                    sum(hypothese[ which(hypothese[,"massgrp"]==old_massgrp),"score"]),i) ) 
    #       }
    #       
    #       if(parent){
    #         annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp, c("ruleID","parent")]))  
    #       }else{
    #         annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp, c("ruleID")],NA))
    #       }
    #       
    #     }
    #   }
    # }
    # 
    # derivativeIons <- getderivativeIons(annoID,annoGrp,rules,length(imz));
    # 
    # mSet@peakAnnotation$AnnotateObject$derivativeIons <- derivativeIons;
    # mSet@peakAnnotation$AnnotateObject$annoID  <- annoID;
    # mSet@peakAnnotation$AnnotateObject$annoGrp <- annoGrp;
    # return(object)
  } else {
    ##Parallel Core Mode
    if(is.null(psg_list)){
      MessageOutput(paste('Calculating possible adducts in',npspectra,'Groups... '),"\n",NULL); 
      lp <- -1;
      pspectra_list <- seq_len(npspectra);
    }else{
      MessageOutput(paste('Calculating possible adducts in',length(psg_list),'Groups... '),"\n",NULL); 
      lp <- -1;
      pspectra_list <- psg_list;
      sum_peaks <- sum(sapply(mSet@peakAnnotation$AnnotateObject$pspectra[psg_list],length));
    }
    
    if("typ" %in% colnames(rules)){
      rules.idx <- which(rules[, "typ"]== "A")
      parent <- TRUE;
    }else{
      #backup for old rule sets
      rules.idx <- seq_len(nrow(rules));
      parent <- FALSE;
    }
    along = pspectra_list;
    pb <- progress_bar$new(format = "Calculating [:bar] :percent Time left: :eta", total = length(along), clear = TRUE, width= 75)
    
    for(j in seq(along)){
      i <- pspectra_list[j];
      
      #peak index for those in pseudospectrum i
      ipeak <- mSet@peakAnnotation$AnnotateObject$pspectra[[i]];
      
      #percent output
      pb$tick();
      #end percent output
      
      #check if the pspec contains more than one peak 
      if(length(ipeak) > 1){
        
        hypothese <- annotateGrp(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx=rules.idx);
        #save results
        if(is.null(hypothese)){
          next;
        }
        charge <- 0;
        old_massgrp <- 0;
        
        #combine annotation hypotheses to annotation groups for one compound mass
        for(hyp in seq_len(nrow(hypothese))){
          peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]);
          if(old_massgrp != hypothese[hyp, "massgrp"]) {
            massgrp <- massgrp + 1;
            old_massgrp <- hypothese[hyp, "massgrp"];
            annoGrp <- rbind(annoGrp, c(massgrp, hypothese[hyp, "mass"], 
                                        sum(hypothese[which(hypothese[, "massgrp"] == old_massgrp), "score"]), i) ) 
          }
          if(parent){
            annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], ipeak[hypothese[hyp, "parent"]]))
          }else{
            annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], NA))
          }
        }
      }
    }
    
    derivativeIons <- getderivativeIons(annoID, annoGrp, rules, length(imz));

    mSet@peakAnnotation$AnnotateObject$derivativeIons <- derivativeIons;
    mSet@peakAnnotation$AnnotateObject$annoID  <- annoID;
    mSet@peakAnnotation$AnnotateObject$annoGrp <- annoGrp;
    return(mSet)
  }
}
getPeaklist <- function(mSet, intval="into") {
  
  if (! intval %in% colnames(mSet@peakAnnotation$peaks)) {
    stop("unknown intensity value!")
  }
  #generate peaktable
  #Check if xcmsSet contains only one sample
  if(mSet@peakAnnotation$AnnotateObject$sample == 1 && length(rownames(mSet@rawOnDisk@phenoData)) == 1){
    #intval is here ignored since all intensity values are already contained
    peaktable <- mSet@peakAnnotation$AnnotateObject$groupInfo;
  }else {
    #Case of xcmsSet with multiple samples
    #Use groupInfo information and replace intensity values
    peaktable <- mSet@peakAnnotation$AnnotateObject$groupInfo;
    
    #get intensity values from xcmsSet
    grpval <- groupval(mSet, value=intval);
    #get column range for replacement
    grpval.ncol <- ncol(grpval)
    start <- ncol(peaktable) - grpval.ncol +1;
    ende  <- start + grpval.ncol - 1; 
    
    peaktable[, start:ende] <- grpval;
  }
  
  #allocate variables for CAMERA output
  adduct   <- vector("character", nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
  isotopes <- vector("character", nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
  pcgroup  <- vector("character", nrow(mSet@peakAnnotation$AnnotateObject$groupInfo));
  
  #default polarity set to positive
  polarity <- "+";
  
  if(length(mSet@peakAnnotation$AnnotateObject$polarity) > 0){
    if(mSet@peakAnnotation$AnnotateObject$polarity == "negative"){
      polarity <- "-";
    }
  }
  
  #First isotope informationen and adduct informationen
  for(i in seq(along = isotopes)){
    #check if adduct annotation is present for peak i
    if(length(mSet@peakAnnotation$AnnotateObject$derivativeIons) > 0 && !(is.null(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]]))) {
      #Check if we have more than one annotation for peak i
      if(length(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]]) > 1) {
        #combine ion species name and rounded mass hypophysis
        names <- paste(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[1]]$name, 
                       signif(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[1]]$mass, 6));
        for(ii in 2:length(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]])) {
          names <- paste(names, mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[ii]]$name, 
                         signif(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[ii]]$mass, 6));
        }
        #save name in vector adduct
        adduct[i] <- names;
      } else {
        #Only one annotation
        adduct[i] <- paste(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[1]]$name, 
                           signif(mSet@peakAnnotation$AnnotateObject$derivativeIons[[i]][[1]]$mass, 6));
      }
    } else {
      #no annotation empty name
      adduct[i] <- ""; 
    }
    
    #Check if we have isotope informationen about peak i
    if(length(mSet@peakAnnotation$AnnotateObject$isotopes) > 0&& !is.null(mSet@peakAnnotation$AnnotateObject$isotopes[[i]])) {
      num.iso <- mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$iso;
      #Which isotope peak is peak i?
      if(num.iso == 0){
        str.iso <- "[M]";
      } else { 
        str.iso <- paste("[M+", num.iso, "]", sep="")
      }
      #Multiple charged?
      if(mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$charge > 1){
        isotopes[i] <- paste("[", mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$y, "]", 
                             str.iso, 
                             mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$charge, polarity, sep="");
      }else{
        isotopes[i] <- paste("[", mSet@peakAnnotation$AnnotateObject$isotopes[[i]]$y, "]", 
                             str.iso, polarity, sep="");
      }
    } else { 
      #No isotope informationen available
      isotopes[i] <- ""; 
    }
  }
  
  #Have we more than one pseudospectrum?
  if(length(mSet@peakAnnotation$AnnotateObject$pspectra) < 1){
    pcgroup <- 0;
  } else {
    for(i in seq(along = mSet@peakAnnotation$AnnotateObject$pspectra)){
      index <- mSet@peakAnnotation$AnnotateObject$pspectra[[i]];
      pcgroup[index] <- i;
    }
  }
  
  rownames(peaktable)<-NULL;#Bugfix for: In data.row.names(row.names, rowsi, i) :  some row.names duplicated:
  return(invisible(data.frame(peaktable, isotopes, adduct, pcgroup, stringsAsFactors=FALSE, row.names=NULL)));
}
sampclass <- function(object) {
  if (ncol(object@phenoData) >0) {
    if(any(colnames(object@phenoData)=="class")){
      sclass <- object$class
      ## in any rate: transform class to a character vector
      ## and generate a new factor on that with the levels
      ## being in the order of the first occurrence of the
      ## elements (i.e. no alphanumeric ordering).
      sclass <- as.character(sclass)
      sclass <- factor(sclass, levels=unique(sclass))
      return(sclass)
    }
    interaction(object@phenoData, drop=TRUE)
  } else {
    factor()
  }
}
calcRules <- function (maxcharge=3, mol=3, nion=2, nnloss=1,
                       nnadd=1, nh=2, polarity=NULL, lib.loc = .libPaths(), multFragments=FALSE){
  
  r <- new("mSetRule")
  
  r <- setDefaultLists(r, lib.loc=lib.loc)
  r <- readLists(r)  
  r <- setParams(r, maxcharge=maxcharge, mol=mol, nion=nion,
                 nnloss=nnloss, nnadd=nnadd, nh=nh, polarity=polarity, lib.loc = lib.loc)  
  if(multFragments){
    r <- generateRules2(r)
  }else{
    r <- generateRules(r)
  }
  
  return(r@rules)
}

generateRules <- function (object) {
  
  maxcharge=object@maxcharge
  mol=object@mol
  nion=object@nion
  nnloss=object@nnloss
  nnadd=object@nnadd 
  nh=object@nh
  polarity=object@polarity
  
  ionlist=object@ionlist
  neutralloss=object@neutralloss
  neutraladdition=object@neutraladdition              
  
  rules=object@rules
  
  name<-c();
  nmol<-c();
  charge<-c();
  massdiff<-c();
  oidscore<-c();
  quasi<-c();
  ips<-c();
  
  ##Erzeuge Regeln
  tmpname   <- c();
  tmpnmol   <- c();
  tmpcharge <- 0;
  tmpmass   <- 0;
  tmpips    <- 0;
  
  ## Molekülionen
  if(polarity=="positive"){
    ## Wasserstoff, hard codiert
    for(k in seq_len(mol)){
      if(k == 1){
        str    <- "";
        tmpips <- 1.0;
      }else{
        str    <-  k;
        tmpips <- 0.5;
      };
      name     <- append(name, paste("[", str, "M+H]+", sep=""));
      charge   <- append(charge, 1);
      massdiff <- append(massdiff, 1.007276);
      nmol     <- append(nmol, k);
      if(k == 1) {
        quasi  <- append(quasi, 1);
      } else { 
        quasi  <- append(quasi, 0);
      };
      oidscore <- append(oidscore, 1);
      ips      <- append(ips, tmpips)
      name     <- append(name,paste("[",str,"M+2H]2+",sep=""));
      charge<-append(charge,2);
      massdiff<-append(massdiff,2.014552);
      nmol<-append(nmol,k);quasi<-append(quasi,0);
      oidscore<-append(oidscore,2);
      ips<-append(ips,tmpips)
      name<-append(name,paste("[",str,"M+3H]3+",sep=""));
      charge<-append(charge,3);
      massdiff<-append(massdiff,3.021828);
      nmol<-append(nmol,k);
      quasi<-append(quasi,0);
      oidscore<-append(oidscore,3);
      ips<-append(ips,tmpips)
      oid<-3;
      for(i in seq_len(nrow(ionlist))){
        if(ionlist[i,2]<=0) {next;}
        if(ionlist[i,2]==1){
          name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]2+",sep=""));
        }else{
          name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]",ionlist[i,2]+1,"+",sep=""));
        }
        charge <- append(charge,ionlist[i,2]+1);
        massdiff <- append(massdiff,ionlist[i,3]+1.007276);
        nmol <- append(nmol,k);
        quasi <- append(quasi,0);
        oidscore <- append(oidscore,oid+i);
        if(tmpips>0.75){
          ips<-append(ips,0.5)
        }else{
          ips<-append(ips,tmpips);
        }#Austausch
      }
      oid<-oid+nrow(ionlist);
      coeff<-expand.grid(rep(list(0:nion),nrow(ionlist)))
      if(length(list<-which(ionlist[,2]<=0))>0){
        coeff[,list]<-0;
      }
      coeff<-unique(coeff);
      coeff<-cbind(coeff,rep(0,nrow(coeff)));
      coeff<-coeff[-1,]
      tmp<-NULL;
      for(i in seq_len(nrow(ionlist))){
        if(ionlist[i,2]<=0)next;
        ## Austausch erstmal nur einen pro Ion
        tmp<-rbind(tmp,t(apply(coeff,1,function(x) {x[i]<-x[i]+1;x[nrow(ionlist)+1]<-1;x})));
      }
      coeff<-unique(rbind(coeff,tmp));
      i <- 1
      while (i <= nrow(coeff)){
        tmpname<-paste("[",str,"M",sep="");
        tmpcharge<-0;tmpmass<-0;
        for(ii in seq_len(ncol(coeff)-1)){
          if(coeff[i,ii]>0){
            if(coeff[i,ii]>1){
              tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep="");
            }else{
              tmpname<-paste(tmpname,"+",ionlist[ii,1],sep="");
            }
            tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2];
            tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3]
          }
        }
        if(coeff[i,ncol(coeff)]>0){
          ## Austausch hat stattgefunden, einfach bsp 1
          tmpname<-paste(tmpname,"-H",sep="");
          tmpcharge<-tmpcharge-1;
          tmpmass<-tmpmass-1.007276;
          tmpips<-0.25;
        }
        if(tmpcharge>1){
          tmpname<-paste(tmpname,"]",tmpcharge,"+",sep="")
        }else{
          tmpname<-paste(tmpname,"]+",sep="")
        }
        name<-append(name,tmpname)
        charge<-append(charge,tmpcharge)
        massdiff<-append(massdiff,tmpmass)
        nmol <- append(nmol,k);
        oidscore<-append(oidscore,oid+i)
        if(sum(coeff[i,])==1&& k==1){
          quasi <- append(quasi,1);
          ips <-append(ips,tmpips);
        }else{
          quasi <- append(quasi,0);
          if(tmpips>0.75){
            ips <- append(ips,0.75);
          }else{
            ips <- append(ips,tmpips);
          }
        }
        i <- i+1
      }
    }
    oid<-max(oidscore);
    
    ## Create Neutral Addition
    index<-which(quasi==1)
    
    if (nrow(neutraladdition)>0) {
      for(i in seq_len(nrow(neutraladdition))) {
        if(length(index2<-which(ionlist[,2]>0))>0){
          for(ii in seq_along(index2)){
            if(ionlist[index2[ii],2] > 1){
              name    <-  append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"+",sep=""));
            }else{
              name    <-  append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]+",sep=""));
            }
            charge <- append(charge,ionlist[index2[ii],2]);
            massdiff <- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]);
            nmol <- append(nmol,1);
            quasi <- append(quasi,0);
            oidscore    <-  append(oidscore,oid+1);oid<-oid+1;
            ips<-append(ips,0.5);
          }
        }
        if(neutraladdition[i,1]=="NaCOOH"){next;}
        name<-append(name,paste("[M+H+",neutraladdition[i,1],"]+",sep=""));
        charge<-append(charge,+1);
        massdiff<- append(massdiff,neutraladdition[i,2]+1.007276);
        nmol<-append(nmol,1);
        quasi<-append(quasi,0)
        oidscore<-append(oidscore,oid+1);oid<-oid+1;
        ips<-append(ips,0.5);
      }
      oid<-max(oidscore);
    }
    
    ##Erzeuge Neutral loss
    index<-which(quasi==1)
    for(i in seq_len(nrow(neutralloss))){
      for(ii in seq_len(maxcharge)){
        if(ii > 1){
          name<-append(name,paste("[M+",ii,"H-",neutralloss[i,1],"]",ii,"+",sep=""));
        }else {name<-append(name,paste("[M+H-",neutralloss[i,1],"]+",sep=""));}
        charge<-append(charge,ii);
        massdiff<-  append(massdiff,-neutralloss[i,2]+1.007276*ii);
        nmol<-append(nmol,1);
        quasi<-append(quasi,0)
        oidscore<-append(oidscore,oid+1);oid<-oid+1;
        ips<-append(ips,0.25);
      }
    }
    ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips)
    if(length(index<-which(ruleset[,"charge"]>maxcharge))>0){
      ruleset<- ruleset[-index,];
    }
  }else if(polarity=="negative"){
    ## Wasserstoff, hard codiert
    for(k in seq_len(mol)){
      if(k==1){str<-"";tmpips<-1.0;}else{str<-k;tmpips<-0.5};
      name<-append(name,paste("[",str,"M-H]-",sep=""));
      charge<-append(charge,-1);massdiff<-append(massdiff,-1.007276);nmol<-append(nmol,k);if(k==1){quasi<-append(quasi,1);}else{quasi<-append(quasi,0);};oidscore<-append(oidscore,1);ips<-append(ips,tmpips)
      name<-append(name,paste("[",str,"M-2H]2-",sep=""));charge<-append(charge,-2);massdiff<-append(massdiff,-2.014552);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,2);ips<-append(ips,tmpips)
      name<-append(name,paste("[",str,"M-3H]3-",sep=""));charge<-append(charge,-3);massdiff<-append(massdiff,-3.021828);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,3);ips<-append(ips,tmpips)
      oid<-3;
      for(i in seq_len(nrow(ionlist))){
        if(ionlist[i,2]>=0){
          if(ionlist[i,2] == 1){
            name<-append(name,paste("[",str,"M-2H+",ionlist[i,1],"]-",sep=""));
            charge <- append(charge,ionlist[i,2]-2);
            massdiff<- append(massdiff,ionlist[i,3]-(2*1.007276));
            nmol <- append(nmol,k);
            quasi <- append(quasi,0);
            oidscore<-append(oidscore,oid+i);
            ips<-append(ips,0.25);
            next;
          } else {
            if(ionlist[i,2] > maxcharge) {next;}
            localCharge <- ionlist[i,2]+1
            name<-append(name,paste("[",str,"M-",localCharge,"H+",ionlist[i,1],"]-",sep=""));
            charge <- append(charge,ionlist[i,2]-localCharge);
            massdiff<- append(massdiff,ionlist[i,3]-(localCharge*1.007276));
            nmol <- append(nmol,k);
            quasi <- append(quasi,0);
            oidscore<-append(oidscore,oid+i);
            ips<-append(ips,0.25);
            next;
          }
        }
        if(ionlist[i,2]== -1){
          name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]2-",sep=""));
        }else{
          name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]",ionlist[i,2]+1,"-",sep=""));
        }
        charge <- append(charge,ionlist[i,2]-1);
        massdiff<- append(massdiff,ionlist[i,3]-1.007276);
        nmol <- append(nmol,k);
        quasi <- append(quasi,0);
        oidscore<-append(oidscore,oid+i);
        ips<-append(ips,tmpips);
        #Austausch
      }
      oid<-oid+nrow(ionlist);
      coeff<-expand.grid(rep(list(0:nion),nrow(ionlist)))
      if(length(list<-which(ionlist[,2]>=0))>0){
        coeff[,list]<-0;
      }
      coeff<-unique(coeff);
      coeff<-cbind(coeff,rep(0,nrow(coeff)));
      coeff<-coeff[-1,] ## remove first row
      i <- 1
      while (i <= nrow(coeff)){
        tmpname<-paste("[",str,"M",sep="");
        tmpcharge<-0;tmpmass<-0;
        for(ii in seq_len(ncol(coeff)-1)){
          if(coeff[i,ii]>0){
            if(coeff[i,ii]>1){
              tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep="");
            }else{
              tmpname<-paste(tmpname,"+",ionlist[ii,1],sep="");
            }
            tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2];
            tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3]
          }
        }
        if(coeff[i,ncol(coeff)]>0){
          #Austausch hat stattgefunden, einfach bsp 1
          tmpname<-paste(tmpname,"-H",sep="");
          tmpcharge<-tmpcharge-1;
          tmpmass<-tmpmass-1.007276;
          tmpips<-0.5;
        }
        if(tmpcharge< -1){
          tmpname<-paste(tmpname,"]",abs(tmpcharge),"-",sep="")
        }else{
          tmpname<-paste(tmpname,"]-",sep="")
        }
        name<-append(name,tmpname)
        charge<-append(charge,tmpcharge)
        massdiff<-append(massdiff,tmpmass)
        nmol <-append(nmol,k);
        oidscore<-append(oidscore,oid+i)
        if(sum(coeff[i,])==1&& k==1){
          quasi   <-append(quasi,1);
        }else{
          quasi <-append(quasi,0);
        }
        ips <-append(ips,tmpips);
        i <- i+1
      }
    }
    oid<-max(oidscore);
    
    ##Erzeuge Neutral Addition
    index<-which(quasi==1)
    for(i in seq_len(nrow(neutraladdition))){
      if(length(index2<-which(ionlist[,2]<0))>0){
        for(ii in seq_along(index2)){
          if(ionlist[index2[ii],2]< -1){
            name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"-",sep=""));
          }else{
            name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]-",sep=""));
          }
          charge <- append(charge,ionlist[index2[ii],2]);
          massdiff<- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]);
          nmol <- append(nmol,1);
          quasi <- append(quasi,0);
          oidscore<-append(oidscore,oid+1);oid<-oid+1;
          ips<-append(ips,0.5);
        }
      }
      name<-append(name,paste("[M-H+",neutraladdition[i,1],"]-",sep=""));
      charge<-append(charge,-1);
      massdiff<- append(massdiff,neutraladdition[i,2]-1.007276);
      nmol<-append(nmol,1);
      quasi<-append(quasi,0)
      oidscore<-append(oidscore,oid+1);oid<-oid+1;
      ips<-append(ips,0.5);
    }
    oid<-max(oidscore);
    
    ##Erzeuge Neutral loss
    index<-which(quasi==1)
    for(i in seq_len(nrow(neutralloss))){
      name<-append(name,paste("[M-H-",neutralloss[i,1],"]-",sep=""));
      charge<-append(charge,-1);
      massdiff<-  append(massdiff,-neutralloss[i,2]-1.007276);
      nmol<-append(nmol,1);
      quasi<-append(quasi,0)
      oidscore<-append(oidscore,oid+1);oid<-oid+1;
      ips<-append(ips,0.25);
    }
    ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips)
    if(length(index<-which(ruleset[,"charge"]< -maxcharge))>0){
      ruleset<- ruleset[-index,];
    }
  }else stop("Unknown issue appers: CODE: OPX0001")
  
  ## Update object rules and return ruleset
  object@rules=ruleset
  object;
}
generateRules2 <- function (object) {
  
  maxcharge=object@maxcharge
  mol=object@mol
  nion=object@nion
  nnloss=object@nnloss
  nnadd=object@nnadd 
  nh=object@nh
  polarity=object@polarity
  
  ionlist=object@ionlist
  neutralloss=object@neutralloss
  neutraladdition=object@neutraladdition              
  
  rules=object@rules
  
  ##Create Rules
  ruleset     <- matrix(nrow=0, ncol=8);
  colnames(ruleset) <- c("name", "nmol","charge", "massdiff", "typ","mandatory",
                         "score", "parent")
  
  tmpname   <- c();
  tmpcharge <- 0;
  tmpmass   <- 0;
  tmpionparent <- NA;
  massH <- 1.007276
  
  ##Positive Rule set
  if(polarity == "positive"){
    charge <- "+"
    chargeValue <- 1
  }else if(polarity == "negative"){
    charge <- "-"
    chargeValue <- -1
  }else{
    stop("Unknown polarity mode in rule set generation! Debug!\n")
  }
  
  #Hydrogen part is hard coded
  for(k in seq_len(mol)){
    if(k == 1){
      #For M+H
      str    <- "";
      tmpips <- 1.5;
      quasi  <- 1 
    }else{
      #For xM+H
      str    <-  k;
      tmpips <- 0;
      quasi  <- 0 
    }
    
    for(xh in seq.int(nh)){
      if(xh == 1){
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, "H]", charge, sep=""), k, chargeValue, 
                                        massH*chargeValue, "A", 
                                        quasi, tmpips+0.5, tmpionparent));
      } else {
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, xh, "H]", xh, charge, sep=""), 
                                        k,xh*chargeValue, massH*xh*chargeValue, "A", 0, 0.5, tmpionparent+1));
      }
    }
    
    for(i in seq_len(nrow(ionlist))){
      #Add change of kat- respectively anion against one hydrogen
      if(ionlist[i, 2] > 0 & charge == "-"){
        if(ionlist[i, 2] == 1){
          sumcharge <- "";
          hdiff <- 1;
        }else{
          sumcharge <- ionlist[i, 2];
          hdiff <- ionlist[i,2]-1;
        }
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "-", sumcharge,"H]", "",
                                              charge, sep=""), k, ionlist[i, 2]*chargeValue, 
                                        ionlist[i, 3]-massH*(1+hdiff),
                                        "A", 0, 0.25, tmpionparent));  
      }
      
      #xM+H + additional Kation like Na or K      
      if(ionlist[i,2] <= 0 & chargeValue > 0) {
        #Ions with negative charge, when polarity is positive
        next;
      } else if(ionlist[i, 2] >= 0 & chargeValue < 0){
        #Ions with positive charge, when polarity is negative
        next;
      }
      
      #Add Rule to Ruleset
      if(abs(ionlist[i, 2]) == 1){
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "]2", 
                                              charge, sep=""), k, ionlist[i, 2]+1*chargeValue, 
                                        ionlist[i, 3]+massH*chargeValue, "A", 0, 0.25, tmpionparent));
      }else{
        ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i, 1], "]", 
                                              ionlist[i, 2]+(1*chargeValue),
                                              charge, sep=""), k, ionlist[i, 2]+1*chargeValue, 
                                        ionlist[i, 3]+massH*chargeValue, "A" ,0, 0.25, tmpionparent));
      }
      
    }#End for loop nrow(ionlist)
    
    ##Coeff - coefficient Matrix, for generating rules with
    #combination of kat- or anionsexchange ions like [M-2H+Na] (M-H and change of H against Na)
    coeff <- expand.grid(rep(list(0:nion), nrow(ionlist)))
    if(chargeValue > 0){
      index <- which(ionlist[, 2] <= 0);
    }else{
      index <- which(ionlist[, 2] >= 0);
    }
    
    if(length(index) > 0){
      coeff[, index] <- 0;
    }
    
    tmp <- NULL;
    for(i in seq_len(nrow(ionlist))){
      if((chargeValue > 0 & ionlist[i, 2] <= 0) | (chargeValue < 0 & ionlist[i,2] >=0)){
        next;
      }
      #Austausch erstmal nur einen pro Ion
      tmp <- rbind(tmp, t(apply(coeff, 1, function(x) {
        x[i] <- x[i]+1;
        x[nrow(ionlist)+1] <- -1;
        x }
      )));
    }
    coeff <- cbind(coeff, rep(0, nrow(coeff)));
    colnames(coeff)[4] <- "Var4"
    colnames(tmp)[4] <- "Var4"
    coeff <- unique(rbind(coeff, tmp));
    
    for(i in seq_len(nrow(coeff))){
      if(sum(coeff[i, seq_len(nrow(ionlist))]) > 2 | sum(coeff[i, seq_len(nrow(ionlist))]) < 1){
        next;
      }
      
      tmpname   <- paste("[",str,"M",sep="");
      tmpcharge <- 0;
      tmpmass   <- 0;
      
      for(ii in seq_len(ncol(coeff))){
        if(coeff[i,ii] > 0){
          if(coeff[i,ii] > 1){
            tmpname <- paste(tmpname, "+", coeff[i, ii], ionlist[ii, 1], sep="");
          }else{
            tmpname <- paste(tmpname, "+", ionlist[ii, 1], sep="");
          }
          tmpcharge <- tmpcharge + coeff[i, ii] * ionlist[ii, 2];
          tmpmass   <- tmpmass + coeff[i, ii] * ionlist[ii, 3];
        } else if (coeff[i,ii] < 0){
          tmpname <- paste(tmpname, "-H", sep="");
          tmpcharge <- tmpcharge - 1;
          tmpmass   <- tmpmass  - massH;
        }
      }
      
      if(abs(tmpcharge) > 1){
        tmpname <- paste(tmpname, "]", tmpcharge, charge, sep="")
      }else{
        tmpname <- paste(tmpname, "]", charge, sep="")
      }
      
      if(tmpcharge > maxcharge | tmpcharge == 0){
        next;
      }
      
      if(sum(coeff[i, ]) == 1 && k == 1 && coeff[i, 4] >=0){
        ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 1, 0.75, tmpionparent));
      }else{
        ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 0, 0.25, tmpionparent));
      }
    }#end for loop nrow(coeff)
  }#end for loop k
  
  # Create neutral addition to M+H from list
  for(i in seq_len(nrow(neutraladdition))){
    #Add neutral ion to only M+H
    ruleset <- rbind(ruleset, cbind(paste("[M", charge, "H+", neutraladdition[i, 1], "]", charge, sep="") , 1, chargeValue, 
                                    neutraladdition[i, 2]+(massH*chargeValue), "A", 0, 0.25, 1));
  }
  
  ## Add neutral loss from list to ruleset
  for(i in seq_len(nrow(neutralloss))){
    ruleset <- rbind(ruleset, cbind(paste("-", neutralloss[i, 1], sep=""), 1, 0, 
                                    -neutralloss[i, 2], "F", 0, 0.25, 1));
    #Eliminate rules with charge > maxcharge
    if(length(index <- which(ruleset[, "charge"] > maxcharge)) > 0){
      ruleset <- ruleset[-index, ];
    }
  }
  ruleset <- as.data.frame(ruleset, stringsAsFactors=FALSE)
  class(ruleset$nmol) <- "numeric"
  class(ruleset$charge) <- "numeric"
  class(ruleset$massdiff) <- "numeric"
  class(ruleset$mandatory) <- "numeric"
  class(ruleset$score) <- "numeric"
  class(ruleset$parent) <- "numeric"
  object@rules=ruleset
  object;
  
  return(object);
}
setDefaultLists <- function(object, lib.loc=.libPaths()) {

    ##Read Tabellen
    object@ionlistfile <- system.file('lists/ions.csv', package = "OptiLCMS",
                                      lib.loc=lib.loc)[1]
    if (!file.exists(object@ionlistfile)) stop('ions.csv not found.')
    
    object@neutrallossfile <- system.file('lists/neutralloss.csv', 
                                          package = "OptiLCMS", lib.loc=lib.loc)[1]
    if (!file.exists(object@neutrallossfile)) stop('neutralloss.csv not found.')
    
    object@neutraladditionfile <- system.file('lists/neutraladdition.csv', 
                                              package = "OptiLCMS", lib.loc=lib.loc)[1]
    if (!file.exists(object@neutraladditionfile)) stop('neutraladdition.csv not found.')
    object

}
readLists <- function(object) {
  
  object@ionlist <- read.table(object@ionlistfile, header=TRUE, dec=".", sep=",",
                               as.is=TRUE, stringsAsFactors = FALSE);
  
  object@neutralloss <- read.table(object@neutrallossfile, header=TRUE, dec=".", sep=",",
                                   as.is=TRUE, stringsAsFactors = FALSE);
  
  object@neutraladdition <- read.table(object@neutraladditionfile,
                                       header=TRUE, dec=".", sep=",",
                                       as.is=TRUE, stringsAsFactors = FALSE);            
  object
}
setParams <- function (object, maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, nh=2, 
                       polarity=NULL, lib.loc=NULL) {
  object@maxcharge=maxcharge 
  object@mol=mol
  object@nion=nion
  object@nnloss=nnloss 
  object@nnadd=nnadd 
  object@nh=nh
  object@polarity=polarity
  object@lib.loc=lib.loc
  object
}
annotateGrp <- function(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx) {
  #m/z vector for group i with peakindex ipeak
  mz     <- imz[ipeak];
  naIdx <- which(!is.na(mz))
  
  #Spectrum have only annotated isotope peaks, without monoisotopic peak
  #Give error or warning?
  if(length(na.omit(mz[naIdx])) < 1){
    return(NULL);
  }
  
  ML <- massDiffMatrix(mz[naIdx], rules[rules.idx,]);
  if(nrow(ML) > 1500) {
    #Avoid too time-consuming running
    return(NULL)
  }
  hypothese <- createHypothese(ML, rules[rules.idx, ], devppm, mzabs, naIdx);
  
  #create hypotheses
  if(is.null(nrow(hypothese)) || nrow(hypothese) < 2 ){
    return(NULL);
  }
  
  #remove hypotheses, which violates via isotope annotation discovered ion charge 
  if(length(isotopes) > 0){
    hypothese <- checkIsotopes(hypothese, isotopes, ipeak);
  }
  
  if(nrow(hypothese) < 2){
    return(NULL);
  }
  
  #Test if hypothese grps include mandatory ions
  #Filter Rules #2
  if(length(quasimolion) > 0){
    hypothese <- checkQuasimolion(hypothese, quasimolion);
  }
  
  if(nrow(hypothese) < 2){
    return(NULL);
  };
  
  #Entferne Hypothesen, welche gegen OID-Score&Kausalität verstossen!
  hypothese <- checkOidCausality(hypothese, rules[rules.idx, ]);
  if(nrow(hypothese) < 2){
    return(NULL);
  };
  
  #Prüfe IPS-Score
  hypothese <- checkIps(hypothese)
  if(nrow(hypothese) < 2){
    return(NULL)
  }
  
  #We have hypotheses and want to add neutral losses
  if("typ" %in% colnames(rules)){
    hypothese <- addFragments(hypothese, rules, mz)
    
    hypothese <- resolveFragmentConnections(hypothese)
  }
  return(hypothese);
}
massDiffMatrix <- function(m, rules){
  #m - m/z vector
  #rules - annotation rules
  nRules <- nrow(rules);
  DM   <- matrix(NA, length(m), nRules)
  
  for (i in seq_along(m)){
    for (j in seq_len(nRules)){
      DM[i, j] <- (abs(rules[j, "charge"] * m[i]) - rules[j, "massdiff"]) / rules[j, "nmol"]    # ((z*m) - add) /n
    }
  }
  return(DM)
}

#' @importFrom stats cutree hclust
createHypothese <- function(ML, rules, devppm, mzabs, naIdx){
  ML.nrow <- nrow(ML);
  ML.vec <- as.vector(ML);
  max.value <- max(round(ML, 0));
  hashmap <- vector(mode="list", length=max.value);
  for(i in seq_along(ML)){
    val <- trunc(ML[i],0);
    if(val>1){
      hashmap[[val]] <- c(hashmap[[val]],i);
    }
  }
  if("ips" %in% colnames(rules)){
    score <- "ips"
  }else{
    score <- "score"
  }
  hypothese <- matrix(NA,ncol=8,nrow=0);
  colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "score", "massgrp", "check");
  massgrp <- 1;
  
  for(i in seq(along=hashmap)){
    if(is.null(hashmap[[i]])){
      next;
    }
    
    candidates <- ML.vec[hashmap[[i]]];
    candidates.index <- hashmap[[i]];
    if(i != 1 && !is.null(hashmap[[i-1]]) && min(candidates) < i+(2*devppm*i+mzabs)){
      index <- which(ML.vec[hashmap[[i-1]]]> i-(2*devppm*i+mzabs))
      if(length(index)>0) {
        candidates <- c(candidates, ML.vec[hashmap[[i-1]]][index]);
        candidates.index <- c(candidates.index,hashmap[[i-1]][index]);
      }    
    }
    
    if(length(candidates) < 2){
      next;
    }
    
    tol <- max(2*devppm*mean(candidates, na.rm=TRUE))+ mzabs;
    result <- cutree(hclust(dist(candidates)), h=tol);
    index <- which(table(result) >= 2);
    if(length(index) == 0){
      next;
    }
    m <- lapply(index, function(x) which(result == x));
    for(ii in seq_along(m)){
      ini.adducts <- candidates.index[m[[ii]]];
      for(iii in seq_along(ini.adducts)){
        adduct <- ini.adducts[iii] %/% ML.nrow +1;
        mass   <- ini.adducts[iii] %% ML.nrow;
        if(mass == 0){
          mass <- ML.nrow;
          adduct <- adduct -1;
        }
        hypothese <- rbind(hypothese, c(naIdx[mass], adduct, rules[adduct, "nmol"], rules[adduct, "charge"], mean(candidates[m[[ii]]]),  rules[adduct,score],massgrp ,1));
      }
      if(length(unique(hypothese[which(hypothese[, "massgrp"] == massgrp), "massID"])) == 1){
        ##only one mass annotated
        hypothese <- hypothese[-(which(hypothese[,"massgrp"]==massgrp)),,drop=FALSE]
      }else{
        massgrp <- massgrp +1;
      }
    }
  }
  return(hypothese);
}
checkIsotopes <- function(hypothese, isotopes, ipeak){
  for(hyp in seq_len(nrow(hypothese))){
    peakid <- ipeak[hypothese[hyp, 1]];
    if(!is.null(isotopes[[peakid]])){
      #Isotope da
      explainable <- FALSE;
      if(isotopes[[peakid]]$charge == abs(hypothese[hyp, "charge"])){
        explainable <- TRUE;
      }
      if(!explainable){
        #delete Rule
        hypothese[hyp,"check"]=0;
      }
    }
  }
  hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ,drop=FALSE];
  #check if hypothese grps annotate at least two different peaks
  hypothese <- checkHypothese(hypothese)
  
  return(hypothese)
}
checkHypothese <- function(hypothese){
  if(is.null(nrow(hypothese))){
    hypothese <- matrix(hypothese, byrow=FALSE, ncol=8)
  } 
  colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "score", "massgrp", "check")
  for(i in unique(hypothese[,"massgrp"])){
    if(length(unique(hypothese[which(hypothese[, "massgrp"] == i), "massID"])) == 1){
      ##only one mass annotated
      hypothese <- hypothese[-(which(hypothese[,"massgrp"]==i)), , drop=FALSE]
    } 
  }
  return(hypothese)
}

checkIps <- function(hypothese){
  for(hyp in seq_len(nrow(hypothese))){
    if(length(which(hypothese[, "massgrp"] == hypothese[hyp, "massgrp"])) < 2){
      hypothese[hyp, "check"] = 0;
    }
  }
  hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ];
  if(is.null(nrow(hypothese))) {
    hypothese <- matrix(hypothese, byrow=FALSE, ncol=9)
  }
  if(nrow(hypothese) < 1){
    colnames(hypothese)<-c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips","massgrp", "check")
    return(hypothese)
  }
  for(hyp in seq_len(nrow(hypothese))){
    if(length(id <- which(hypothese[, "massID"] == hypothese[hyp, "massID"] & hypothese[, "check"] != 0)) > 1){
      masses <- hypothese[id, "mass"]
      nmasses <- sapply(masses, function(x) { 
        sum(hypothese[which(hypothese[, "mass"] == x), "score"]) 
      })
      masses <- masses[-which(nmasses == max(nmasses))];
      if(length(masses) > 0){
        hypothese[unlist(sapply(masses, function(x) {which(hypothese[, "mass"]==x)})), "check"]=0;
      }
    }
  }
  
  hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ,drop=FALSE];
  #check if hypothese grps annotate at least two different peaks
  hypothese <- checkHypothese(hypothese)
  return(hypothese)
}
checkOidCausality <- function(hypothese,rules){
  #check every hypothese grp
  for(hyp in unique(hypothese[,"massgrp"])){
    hyp.nmol <- which(hypothese[, "massgrp"] == hyp & hypothese[, "nmol"] > 1)
    
    for(hyp.nmol.idx in hyp.nmol){
      if(length(indi <- which(hypothese[, "mass"] == hypothese[hyp.nmol.idx, "mass"] & 
                              abs(hypothese[, "charge"]) == hypothese[, "nmol"])) > 1){
        if(hyp.nmol.idx %in% indi){
          #check if [M+H] [2M+2H]... annotate the same molecule
          massdiff <- rules[hypothese[indi, "ruleID"], "massdiff"] / 
            rules[hypothese[indi, "ruleID"], "charge"]
          if(length(indi_new <- which(duplicated(massdiff))) > 0){
            hypothese[hyp.nmol.idx, "check"] <- 0;
          }
        }
      }
    }
  }
  
  hypothese <- hypothese[which(hypothese[, "check"] == TRUE), ,drop=FALSE];
  #check if hypothese grps annotate at least two different peaks
  hypothese <- checkHypothese(hypothese)
  return(hypothese)
}
checkQuasimolion <- function(hypothese, quasimolion){
  hypomass <- unique(hypothese[, "mass"])
  for(mass in seq_along(hypomass)){
    if(!any(quasimolion %in% hypothese[which(hypothese[, "mass"] == hypomass[mass]), "ruleID"])){
      hypothese[which(hypothese[, "mass"] == hypomass[mass]), "check"] = 0;
    }else if(is.null(nrow(hypothese[which(hypothese[, "mass"] == hypomass[mass]), ]))){
      hypothese[which(hypothese[, "mass"] == hypomass[mass]), "check"] = 0;
    }
  }
  
  hypothese <- hypothese[which(hypothese[, "check"]==TRUE), , drop=FALSE];
  #check if hypothese grps annotate at least two different peaks
  hypothese <- checkHypothese(hypothese)
  
  return(hypothese)
}

getderivativeIons <- function(annoID, annoGrp, rules, npeaks){
  #generate Vector length npeaks
  derivativeIons <- vector("list", npeaks);
  #intrinsic charge
  #TODO: Not working at the moment
  charge <- 0;
  
  #check if we have annotations
  if(nrow(annoID) < 1){
    return(derivativeIons);
  }
  
  for(i in seq_len(nrow(annoID))){
    
    peakid  <-  annoID[i, 1];
    grpid   <-  annoID[i, 2];
    ruleid  <-  annoID[i, 3];
    
    if(is.null(derivativeIons[[peakid]])){
      #Peak has no annotations so far
      if(charge == 0 | rules[ruleid, "charge"] == charge){
        derivativeIons[[peakid]][[1]] <- list( rule_id = ruleid, 
                                               charge = rules[ruleid, "charge"], 
                                               nmol = rules[ruleid, "nmol"], 
                                               name = paste(rules[ruleid, "name"]),
                                               mass = annoGrp[grpid, 2])
      }
    } else {
      #Peak has already an annotation
      if(charge == 0 | rules[ruleid, "charge"] == charge){
        derivativeIons[[peakid]][[(length(
          derivativeIons[[peakid]])+1)]] <- list( rule_id = ruleid, 
                                                  charge = rules[ruleid, "charge"],
                                                  nmol = rules[ruleid, "nmol"],
                                                  name=paste(rules[ruleid, "name"]),
                                                  mass=annoGrp[grpid, 2])
      }
    }
    
    charge <- 0;
  }
  return(derivativeIons);
}

profMat <- function(object,method = "bin",step = 0.1,baselevel = NULL,
                    basespace = NULL,mzrange. = NULL,fileIndex,...) {
  ## Subset the object by fileIndex.
  if (!missing(fileIndex)) {
    if (!is.numeric(fileIndex))
      stop("'fileIndex' has to be an integer.")
    if (!all(fileIndex %in% seq_along(fileNames(object))))
      stop("'fileIndex' has to be an integer between 1 and ",
           length(fileNames(object)), "!")
    object <- filterFile(object, fileIndex)
  }
  ## Split it by file and bplapply over it to generate the profile matrix.
  theF <- factor(seq_along(fileNames(object)))
  theDots <- list(...)
  if (any(names(theDots) == "returnBreaks"))
    returnBreaks <- theDots$returnBreaks
  else
    returnBreaks <- FALSE
  res <- bplapply(splitByFile(object, f = theF), function(z, bmethod, bstep,
                                                          bbaselevel,
                                                          bbasespace,
                                                          bmzrange.,
                                                          breturnBreaks) {

    if (is(z, "MSnExp") && !is(z, "OnDiskMSnExp")) {
      sps <- z@assayData
    } else if (is(z, "OnDiskMSnExp")) {
      sps <- spectra(z, BPPARAM = SerialParam())
    } else {
      stop("Wrong Data provided!")
    }

    mzs <- lapply(sps, mz)
    ## Fix for issue #301: got spectra with m/z being NA.
    if (any(is.na(unlist(mzs, use.names = FALSE)))) {
      sps <- lapply(sps, clean, all = TRUE)
      mzs <- lapply(sps, mz)
    }
    ## Fix for issue #312: remove empty spectra, that we are however adding
    ## later so that the ncol(profMat) == length(rtime(object))
    pk_count <- lengths(mzs)
    empty_spectra <- which(pk_count == 0)
    if (length(empty_spectra)) {
      mzs <- mzs[-empty_spectra]
      sps <- sps[-empty_spectra]
    }
    vps <- lengths(mzs, use.names = FALSE)
    res <- .createProfileMatrix(mz = unlist(mzs, use.names = FALSE),
                                int = unlist(lapply(sps, intensity),
                                             use.names = FALSE),
                                valsPerSpect = vps,
                                method = bmethod,
                                step = bstep,
                                baselevel = bbaselevel,
                                basespace = bbasespace,
                                mzrange. = bmzrange.,
                                returnBreaks = breturnBreaks)
    if (length(empty_spectra))
      if (returnBreaks)
        res$profMat <- .insertColumn(res$profMat, empty_spectra, 0)
    else
      res <- .insertColumn(res, empty_spectra, 0)
    res
  }, bmethod = method, bstep = step, bbaselevel = baselevel,
  bbasespace = basespace, bmzrange. = mzrange., breturnBreaks = returnBreaks)
  res
}

.createProfileMatrix <- function(mz, int, valsPerSpect,
                                 method, step = 0.1, baselevel = NULL,
                                 basespace = NULL,
                                 mzrange. = NULL,
                                 returnBreaks = FALSE,
                                 baseValue = 0) {
  profMeths <- c("bin", "binlin", "binlinbase", "intlin")
  names(profMeths) <- c("none", "lin", "linbase", "intlin")
  method <- match.arg(method, profMeths)
  impute <- names(profMeths)[profMeths == method]
  brks <- NULL
  
  if (length(mzrange.) != 2) {
    mrange <- range(mz, na.rm = TRUE)
    mzrange. <- c(floor(mrange[1] / step) * step,
                  ceiling(mrange[2] / step) * step)
  }
  mass <- seq(mzrange.[1], mzrange.[2], by = step)
  mlength <- length(mass)
  
  ## Calculate the "real" bin size; old xcms code oddity that that's different
  ## from step.
  
  bin_size <- (mass[mlength] - mass[1]) / (mlength - 1)
  
  ## Define the breaks.
  toIdx <- cumsum(valsPerSpect)
  fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L)
  shiftBy <- TRUE
  binFromX <- min(mass)
  binToX <- max(mass)
  brks <- breaks_on_nBinsR(fromX = binFromX, toX = binToX,
                          nBins = mlength, shiftByHalfBinSize = TRUE)
  
  ## for profIntLinM we have to use the old code.
  if (impute == "intlin") {
    profFun <- "profIntLinM"
    profp <- list()
    scanindex <- valueCount2ScanIndex(valsPerSpect)
    buf <- do.call(profFun, args = list(mz, int,
                                        scanindex, mlength,
                                        mass[1], mass[mlength],
                                        TRUE))
  } else {
    ## Binning the data.
    binRes <- binYonXR(mz, int,
                      breaks = brks,
                      fromIdx = fromIdx,
                      toIdx = toIdx,
                      baseValue = ifelse(impute == "none", yes = baseValue,
                                         no = NA),
                      sortedX = TRUE,
                      returnIndex = FALSE,
                      returnX = FALSE
    )
    if (length(toIdx) == 1)
      binRes <- list(binRes)
    ## Missing value imputation.
    if (impute == "linbase") {
      ## need arguments distance and baseValue.
      if (length(basespace) > 0) {
        if (!is.numeric(basespace))
          stop("'basespace' has to be numeric!")
        distance <- floor(basespace[1] / bin_size)
      } else {
        distance <- floor(0.075 / bin_size)
      }
      if (length(baselevel) > 0) {
        if (!is.numeric(baselevel))
          stop("'baselevel' has to be numeric!")
        baseValue <- baselevel
      } else {
        baseValue <- min(int, na.rm = TRUE) / 2
      }
    } else {
      distance <- 0
      baseValue <- 0
    }
    if (method == "none") {
      ## binVals <- lapply(binRes, function(z) z$y)
      binVals <- binRes
    } else {
      binVals <- lapply(binRes, function(z) {
        imputeLinInterpol(z$y, method = impute, distance = distance,
                          noInterpolAtEnds = TRUE,
                          baseValue = baseValue)
      })
    }
    buf <- base::do.call(cbind, binVals)
  }
  if (returnBreaks)
    buf <- list(profMat = buf, breaks = brks)
  buf
}
.insertColumn <- function(x, pos = integer(), val = NULL) {
  if (length(pos)) {
    if (length(val) == 1)
      val <- rep(val, length(pos))
    if (length(val) != length(pos))
      stop("length of 'pos' and 'val' have to match")
  }
  for (i in seq_along(pos)) {
    if (pos[i] == 1) {
      x <- cbind(val[[i]], x)
    } else {
      if (pos[i] == ncol(x))
        x <- cbind(x, val[[i]])
      else
        x <- cbind(x[, seq_len(pos[i]-1)], val[[i]], x[, pos[i]:ncol(x)])
    }
  }
  x
}

.exportmSetRuleClass <- function(){
  return(new("mSetRule"))
}
valueCount2ScanIndex <- function(valCount){
  ## Convert into 0 based.
  valCount <- cumsum(valCount)
  return(as.integer(c(0, valCount[-length(valCount)])))
}

naOmit <- function(x) {
  return (x[!is.na(x)]);
}


bede <- function (x, y, index) 
{
  EDE <- c()
  BEDE <- c()
  a <- c(x[1])
  b <- c(x[length(x)])
  nped <- c(length(x))
  x2 <- x
  y2 <- y
  B = ede(x, y, index)
  EDE <- c(EDE, B[1, 3])
  BEDE <- c(BEDE, B[1, 3])
  iplast = B[1, 3]
  j <- 0
  while (!(is.nan(B[1, 3]))) {
    ifelse(B[1, 2] >= B[1, 1] + 3, {
      j <- j + 1
      x2 <- x2[B[1, 1]:B[1, 2]]
      y2 <- y2[B[1, 1]:B[1, 2]]
      B <- ede(x2, y2, index)
      ifelse(!(is.nan(B[1, 3])), {
        a = c(a, x2[B[1, 1]])
        b = c(b, x2[B[1, 2]])
        nped <- c(nped, length(x2))
        EDE <- c(EDE, B[1, 3])
        BEDE <- c(BEDE, B[1, 3])
        iplast = B[1, 3]
      }, {
        break
      })
    }, {
      break
    })
  }
  iters = as.data.frame(cbind(nped, a, b, BEDE))
  colnames(iters) = c("n", "a", "b", "EDE")
  rownames(iters) = 1:length(nped)
  out = list()
  out[["iplast"]] = iplast
  out[["iters"]] = iters
  return(out)
}

ede <- function (x, y, index) 
{
  n = length(x)
  if (index == 1) {
    y = -y
  }
  ifelse(n >= 4, {
    LF = y - lin2(x[1], y[1], x[n], y[n], x)
    jf1 = which.min(LF)
    xf1 = x[jf1]
    jf2 = which.max(LF)
    xf2 = x[jf2]
    ifelse(jf2 < jf1, {
      xfx <- NaN
    }, {
      xfx <- 0.5 * (xf1 + xf2)
    })
  }, {
    jf1 = NaN
    jf2 = NaN
    xfx = NaN
  })
  out = matrix(c(jf1, jf2, xfx), nrow = 1, ncol = 3, byrow = TRUE)
  rownames(out) = "EDE"
  colnames(out) = c("j1", "j2", "chi")
  return(out)
}

lin2 <- function (x1, y1, x2, y2, x) 
{
  y1 + (y2 - y1) * (x - x1)/(x2 - x1)
}

####### ------------ ======== Bottom of this kit ========= ------------ ######\

.getDataPath <- function() {
  
  if (file.exists("/home/zgy/scheduler/MetaboAnalyst.so")) {
    path <-
      "/home/glassfish/payara5/glassfish/domains/domain1/applications/MetaboAnalyst/resources/data/"
  } else if (dir.exists("/media/zzggyy/disk/")) {
    path <-
      "/media/zzggyy/disk/MetaboAnalyst/target/MetaboAnalyst-5.18/resources/data/"
  } else if (.on.public.web()) {
    path <- "../../data/"
  }
  
  return(path)
}

.replace.by.lod <- function(x){
  lod <- min(x[x>0], na.rm=TRUE)/5;
  x[x==0|is.na(x)] <- lod;
  return(x);
}
xia-lab/OptiLCMS documentation built on March 27, 2024, 11:11 a.m.