R/Spectra_Utils.R

Defines functions .replace.by.lod .getDataPath naOmit valueCount2ScanIndex .exportmSetRuleClass .insertColumn .createProfileMatrix profMat getderivativeIons checkQuasimolion checkOidCausality checkIps checkHypothese checkIsotopes createHypothese massDiffMatrix annotateGrp readLists setDefaultLists sampclass getPeaklist findAdducts calcPC.hcs calcCiS mpi.comm.size combineCalc fastMatchR getAllPeakEICs create.matrix addFragments resolveFragmentConnections findIsotopesPspec calcIsotopeMatrix is.wholenumber groupval getPeaks_selection .getDdaMS2Scans PeakPicking_core PeakPicking_prep filtfft .aggregateMethod2int .get_closest_index .groupval .feature_values .getChromPeakData .applyRtAdjustment .applyRtAdjToChromPeaks .peakIndex .getPeakGroupsRtMatrix .rawMat .narrow_rt_boundaries .match_trim_vectors .match_trim_vector_index na.flatfill fitGauss gauss cent gaussCoverage trimm joinOverlappingPeaks descendMinTol valueCount2ScanIndex MSW.getRidge MSW.getLocalMaximumCWT MSW.extendLength MSW.extendNBase getLocalNoiseEstimate OptiChromPeaks adjustRtimeSubset updateRawSpectraPath creatPeakTable mSet2xcmsSet PerformBlankSubstraction PerformPeakFiling mSet.obiwarp RT.Adjust_peakGroup adjustRtime_obiwarp adjustRtime_peakGroup PerformRTcorrection PerformPeakAlignment Densitygrouping_slave PerformPeakGrouping PeakPicking_MatchedFilter_slave PeakPicking_Massifquant_slave PeakPicking_centWave_slave PerformPeakPicking

Documented in PerformPeakAlignment PerformPeakFiling PerformPeakPicking updateRawSpectraPath

# Content of this script

# 1. raw data function - xcms 
# 2. Peaks Peaking Function - parameters optimization
# 3. MS/MS processing - (moved from preproc_utils)
# 4. Peak Annotation - CAMERA
# 5. MS File Reading - MSnbase

# --------------------1. Raw spectra processing_Section using the xcms way---------------------------------------------

#' @title PerformPeakPicking
#' @description This funciton is used to Perform Peak Picking on an object generated by ImportRawMSdata
#' @param mSet the raw data object read by ImportRawMSData function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam(). Optional.
#' @export
#' @return will return an mSet object with peaks picked
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @examples
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#'                full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' # mSet <- PerformPeakPicking(mSet);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)

PerformPeakPicking<-function(mSet, BPPARAM = bpparam()){
  
  if(missing(mSet) & file.exists("mSet.rda")){
    load("mSet.rda")
  } else if(missing(mSet) & !file.exists("mSet.rda")){
    if(.on.public.web()){
      MessageOutput("ERROR: mSet is missing !", NULL, NULL);
      stop();
    } else {
      stop("mSet is missing ! Please make sure mSet is well defined !");
    }
  }
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  object <- mSet@rawOnDisk;
  param <- mSet@params;
  
  if(length(object) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
    } else {
      stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
    }
  }
  
  object_mslevel <- MSnbase::filterMsLevel(
    MSnbase::selectFeatureData(object,
                               fcol = c(.MSnExpReqFvarLabels,
                                        "centroided")), msLevel. = 1)
  

  # Splite the MS data 
  object_mslevel <- lapply(seq_along(fileNames(object_mslevel)),
                           FUN = filterFile,
                           object = object_mslevel)
  
  param$.optimize_switch <- .SwapEnv$.optimize_switch;
  
  # Peak picking runnning - centWave mode
  if (param$Peak_method == "centWave")
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_centWave_slave, # slave Function of centWave
                        param = param, BPPARAM = BPPARAM)
  
  
  # Peak picking runnning - Massifquant mode
  if (param$Peak_method == "Massifquant")
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_Massifquant_slave, # slave Function of Massifquant
                        param = param, BPPARAM = BPPARAM)
  
  
  # Peak picking runnning - MatchedFilter mode
  if (param$Peak_method == "matchedFilter"){
    resList <- bplapply(object_mslevel,
                        FUN = PeakPicking_MatchedFilter_slave, # slave Function of MatchedFilter
                        param = param, BPPARAM = BPPARAM)
  }
  
  
  # if (param$Peak_method != "matchedFilter" && param$Peak_method != "centWave"){
  #   stop("Only 'centWave' and 'MatchedFilter' are suppoted now !")
  # }
  
  # Picking Cluster
  
  ##### PEAK SUMMARY------------
  
  pks <- vector("list", length(resList))
  for (i in seq_along(resList)) {
    n_pks <- nrow(resList[[i]])
    if (is.null(n_pks))
      n_pks <- 0
    if (n_pks == 0) {
      pks[[i]] <- NULL
      if(!.SwapEnv$.optimize_switch){
        warning("No peaks found in sample number ", i, ".")
      }
    } else {
      pks[[i]] <- cbind(resList[[i]], sample = rep.int(i, n_pks))
    }
    
  } 
  pks <- do.call(rbind, pks)
  
  # Make the chrompeaks embeded
  newFd <- list()
  #newFd@.xData <- as.environment(as.list(object@msFeatureData, all.names = TRUE))
  rownames(pks) <- sprintf(paste0("CP", "%0", ceiling(log10(nrow(pks) + 1L)), "d"),
                           seq(from = 1, length.out = nrow(pks)))
  
  newFd$chromPeaks <- pks
  newFd$chromPeakData <- S4Vectors::DataFrame(ms_level = rep(1L, nrow(pks)), is_filled = rep(FALSE, nrow(pks)),
                                              row.names = rownames(pks))
  
  ## mSet Generation
  mSet@peakpicking <- newFd;
  
  # Stop parallel
  register(bpstop());
  
  return(mSet)
}

#' @title PeakPicking_centWave_slave
#' @description PeakPicking_centWave_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_centWave_slave <- function(x, param){
  
  if(!exists(".optimize_switch")){
    if(!is.null(param$.optimize_switch)){
      .optimize_switch <- param$.optimize_switch;
    } else {
      .optimize_switch <- TRUE;
    }
   
  }
 
  # load necessary C code for data processing
  # for raw data processing
  if (is(x, "OnDiskMSnExp")){
    scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
    rt <- MSnbase::rtime(x);
    filenamevalue <- basename(x@processingData@files)
  }
  
  # for parameters optimization
  if (is(x,"list")) {
    scan.set <- x;
    rt <- unlist(lapply(x,  MSnbase::rtime), use.names = FALSE);
    filenamevalue <- NA;
  }
  
  mzs <- lapply(scan.set, MSnbase::mz)
  vals_per_spect <- lengths(mzs, FALSE)
  
  
  #if (any(vals_per_spect == 0))
  #  mzs <- mzs[-which(vals_per_spect == 0)];
  #  x <- x[-which(vals_per_spect == 0)]; # Cannot be so easy !
  #  warning("Found empty spectra Scan. Filtered Automatically !")
  
  mz = unlist(mzs, use.names = FALSE);
  int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
  valsPerSpect = vals_per_spect;
  scantime = rt;
  
  ######   centWaveCore Function
  
  valCount <- cumsum(valsPerSpect);
  scanindex <- as.integer(c(0, valCount[-length(valCount)])); ## Get index vector for C calls
  
  if (!is.double(mz))
    mz <- as.double(mz)
  if (!is.double(int))
    int <- as.double(int)
  ## Fix the mzCenterFun
  #mzCenterFun <- paste("mzCenter",
  #                     gsub(mzCenterFun, pattern = "mzCenter.",
  #                          replacement = "", fixed = TRUE), sep=".")
  
  basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax",
                 "into", "intb", "maxo", "sn")
  verbosenames <- c("egauss", "mu", "sigma", "h", "f", "dppm", "scale",
                    "scpos", "scmin", "scmax", "lmin", "lmax")
  
  ## Peak width: seconds to scales
  peakwidth <- param$peakwidth
  scalerange <- round((peakwidth / mean(diff(scantime))) / 2)
  
  if (length(z <- which(scalerange == 0)))
    scalerange <- scalerange[-z]
  if (length(scalerange) < 1) {
    warning("No scales? Please check peak width!")
    if (param$verboseColumns) {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
                          length(verbosenames))
      colnames(nopeaks) <- c(basenames, verbosenames)
    } else {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames))
      colnames(nopeaks) <- c(basenames)
    }
    return(invisible(nopeaks))
  }
  
  if (length(scalerange) > 1){
    scales <- seq(from = scalerange[1], to = scalerange[2], by = 2)
  } else{
    scales <- scalerange
  }
  
  minPeakWidth <-  scales[1]
  noiserange <- c(minPeakWidth * 3, max(scales) * 3)
  maxGaussOverlap <- 0.5
  minPtsAboveBaseLine <- max(4, minPeakWidth - 2)
  minCentroids <- minPtsAboveBaseLine
  scRangeTol <-  maxDescOutlier <- floor(minPeakWidth / 2)
  scanrange <- c(1, length(scantime))

  ## If no ROIs are supplied then search for them.
  roiList <- list()
  if (length(roiList) == 0) {
    if (.on.public.web() & !.optimize_switch) {
      # Do nothing
    } else if(.optimize_switch) {
      # Do nothing
    } else {
      message("ROI searching under ", param$ppm, " ppm ... ", appendLF = FALSE)
    }
    
    withRestarts(
      tryCatch({
        tmp <- capture.output(roiList <-
                                findmzROIR (mz, int, scanindex, scanrange, scantime, param,
                                           minCentroids))
      },
      error = function(e) {
        if (grepl("m/z sort assumption violated !", e$message)) {
          invokeRestart("fixSort")
        } else {
          simpleError(e)
        }
      }),
      fixSort = function() {
        warning("m/z sort assumption violated !")
      }
    )
    
  }
  
  ## Second stage: process the ROIs
  peaklist <- list();
  Nscantime <- length(scantime)
  lf <- length(roiList)
  # save(roiList, file = paste0("roiList_",Sys.time(),".rda"))
  ## print('\n Detecting chromatographic peaks ... \n % finished: ')
  ## lp <- -1
  
  if (.on.public.web() & !.optimize_switch){
    
    print_mes <- paste0("Detecting peaks in ", length(roiList)," regions of interest of ", filenamevalue, " ...");    
    #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
    
    .SwapEnv$count_current_sample <- count_current_sample <- .SwapEnv$count_current_sample +1;
    count_total_sample <- .SwapEnv$count_total_sample;
    #write.table(count_current_sample, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
    write.table(25 + count_current_sample*3/count_total_sample*25, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
    
  } else if(.optimize_switch) {
    # do nothing
  } else {
    message("Detecting peaks in ", length(roiList),
            " regions of interest of ", filenamevalue, " ...", appendLF = FALSE)
  }
  
  roiScales = NULL;
 
  for (f in seq_len(lf)) {
    feat <- roiList[[f]]
    N <- feat$scmax - feat$scmin + 1
    peaks <- peakinfo <- NULL
    mzrange <- c(feat$mzmin, feat$mzmax)
    sccenter <- feat$scmin[1] + floor(N/2) - 1
    scrange <- c(feat$scmin, feat$scmax)
    ## scrange + noiserange, used for baseline detection and wavelet analysis
    sr <- c(max(scanrange[1], scrange[1] - max(noiserange)),
            min(scanrange[2], scrange[2] + max(noiserange)))
    
    eic <- getEICR (mz, int, scanindex, mzrange, sr)
    
    ## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr)
    d <- eic$intensity;
    td <- sr[1]:sr[2];
    scan.range <- c(sr[1], sr[2]);
    ## original mzROI range
    idxs <- which(eic$scan %in% seq(scrange[1], scrange[2]));
    mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]);
    ## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange)
    
    omz <- getMZR(mz, int, scanindex, mzrange, scrange, scantime);
    
    ## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange)
    if (all(omz == 0)) {
      warning("centWave: no peaks found in ROI.")
      next
    }
    od  <- mzROI.EIC$intensity
    otd <- mzROI.EIC$scan
    if (all(od == 0)) {
      warning("centWave: no peaks found in ROI.")
      next
    }
    
    ## scrange + scRangeTol, used for gauss fitting and continuous
    ## data above 1st baseline detection
    ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)],
                                                     scrange[2] + scRangeTol)
    fd <- d[match(ftd, td)]
    
    ## 1st type of baseline: statistic approach
    if (N >= 10*minPeakWidth) {
      ## in case of very long mass trace use full scan range
      ## for baseline detection
      
      noised <- getEICR(mz, int, scanindex, mzrange, scanrange)$intensity
      
      ## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity
    } else {
      noised <- d
    }
    ## 90% trimmed mean as first baseline guess
    gz <- which(noised > 0);
    
    if (length(gz) < 3*minPeakWidth){
      noise <- mean(noised)
    } else {
      noise <- mean(noised[gz], trim=0.05)
    }
    
    firstBaselineCheck = TRUE
    ## any continuous data above 1st baseline ?
    if (firstBaselineCheck &
        !continuousPtsAboveThresholdR(fd, threshold = noise,
                                     num = minPtsAboveBaseLine)){
      # save(firstBaselineCheck, file = paste0("sss_",Sys.time(),".rda"))
      next
    }
    
    ## 2nd baseline estimate using not-peak-range
    lnoise <- getLocalNoiseEstimate(d, td, ftd, noiserange, Nscantime,
                                    threshold = noise,
                                    num = minPtsAboveBaseLine)
    # 
    # if(.optimize_switch){
    #   write.table(paste0("WK_4_",f,"_",Sys.time()), file = "tmp_progress.txt",row.names = FALSE,quote = FALSE,col.names = FALSE,append = TRUE,eol ="\n")
    # }
    ## Final baseline & Noise estimate
    baseline <- max(1, min(lnoise[1], noise))
    sdnoise <- max(1, lnoise[2])
    sdthr <-  sdnoise * param$snthresh
    ## is there any data above S/N * threshold ?
    if (!(any(fd - baseline >= sdthr)))
      next
    wCoefs <- MSW.cwt(d, scales = scales, wavelet = 'mexh');
    
    # if(.optimize_switch){
    #   write.table(paste0("WK_5_",f,"_",Sys.time()), file = "tmp_progress.txt",row.names = FALSE,quote = FALSE,col.names = FALSE,append = TRUE,eol ="\n")
    # }
    
    if (!(!is.null(dim(wCoefs)) && any(wCoefs- baseline >= sdthr)))
      next
    if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise
      wCoefs[nrow(wCoefs),] <- wCoefs[nrow(wCoefs) - 1, ] * 0.99
    localMax <- MSW.getLocalMaximumCWT(wCoefs);
    
    rL <- MSW.getRidge(localMax)
    wpeaks <- sapply(rL,
                     function(x) {
                       w <- min(seq_along(x),ncol(wCoefs))
                       any(wCoefs[x,w]- baseline >= sdthr)
                     })
    if (any(wpeaks)) {
      wpeaksidx <- which(wpeaks)
      ## check each peak in ridgeList
      for (p in seq_along(wpeaksidx)) {
        opp <- rL[[wpeaksidx[p]]]
        pp <- unique(opp)
        if (length(pp) >= 1) {
          dv <- td[pp] %in% ftd
          if (any(dv)) { ## peaks in orig. data range
            ## Final S/N check
            if (any(d[pp[dv]]- baseline >= sdthr)) {
              ## if(!is.null(roiScales)) {
              ## allow roiScales to be a numeric of length 0
              if(length(roiScales) > 0) {
                ## use given scale
                best.scale.nr <- which(scales == roiScales[[f]])
                if(best.scale.nr > length(opp))
                  best.scale.nr <- length(opp)
              } else {
                ## try to decide which scale describes the peak best
                inti <- numeric(length(opp))
                irange <- rep(ceiling(scales[1]/2), length(opp))
                for (k in seq_along(opp)) {
                  kpos <- opp[k]
                  r1 <- ifelse(kpos - irange[k] > 1,
                               kpos-irange[k], 1)
                  r2 <- ifelse(kpos + irange[k] < length(d),
                               kpos + irange[k], length(d))
                  inti[k] <- sum(d[r1:r2])
                }
                maxpi <- which.max(inti)
                if (length(maxpi) > 1) {
                  m <- wCoefs[opp[maxpi], maxpi]
                  bestcol <- which(m == max(m),
                                   arr.ind = TRUE)[2]
                  best.scale.nr <- maxpi[bestcol]
                } else  best.scale.nr <- maxpi
              }
              
              best.scale <-  scales[best.scale.nr]
              best.scale.pos <- opp[best.scale.nr]
              
              pprange <- min(pp):max(pp)
              ## maxint <- max(d[pprange])
              lwpos <- max(1, best.scale.pos - best.scale)
              rwpos <- min(best.scale.pos + best.scale, length(td))
              p1 <- match(td[lwpos], otd)[1]
              p2 <- match(td[rwpos], otd)
              p2 <- p2[length(p2)]
              if (is.na(p1)) p1 <- 1
              if (is.na(p2)) p2 <- N
              mz.value <- omz[p1:p2]
              mz.int <- od[p1:p2]
              maxint <- max(mz.int)
              
              ## re-calculate m/z value for peak range
              mzrange <- range(mz.value)
              #mzmean <- do.call(mzCenterFun,
              #                  list(mz = mz.value,
              #                       intensity = mz.int))
              mzmean <- weighted.mean(mz.value, mz.int)
              ## Compute dppm only if needed
              dppm <- NA
              if (param$verboseColumns) {
                if (length(mz.value) >= (minCentroids + 1)) {
                  dppm <- round(min(running(abs(diff(mz.value)) /
                                              (mzrange[2] *  1e-6),
                                            fun = max,
                                            width = minCentroids)))
                } else {
                  dppm <- round((mzrange[2] - mzrange[1]) /
                                  (mzrange[2] * 1e-6))
                }
              }
              peaks <- rbind(peaks,
                             c(mzmean,mzrange, ## mz
                               NA, NA, NA,     ## rt, rtmin, rtmax,
                               NA,             ## intensity (sum)
                               NA,             ## intensity (-bl)
                               maxint,         ## max intensity
                               round((maxint - baseline) / sdnoise),  ##  S/N Ratio
                               NA,             ## Gaussian RMSE
                               NA,NA,NA,       ## Gaussian Parameters
                               f,              ## ROI Position
                               dppm,           ## max. difference between the [minCentroids] peaks in ppm
                               best.scale,     ## Scale
                               td[best.scale.pos],
                               td[lwpos],
                               td[rwpos],  ## Peak positions guessed from the wavelet's (scan nr)
                               NA, NA))                    ## Peak limits (scan nr)
              peakinfo <- rbind(peakinfo,
                                c(best.scale, best.scale.nr,
                                  best.scale.pos, lwpos, rwpos))
              ## Peak positions guessed from the wavelet's
            }
          }
        }
      }  ##for
    } ## if
    ##  postprocessing
    if (!is.null(peaks)) {
      colnames(peaks) <- c(basenames, verbosenames)
      colnames(peakinfo) <- c("scale", "scaleNr", "scpos",
                              "scmin", "scmax")
      for (p in seq_len(dim(peaks)[1])) {
        ## find minima (peak boundaries), assign rt and intensity values
        if (param$integrate == 1) {
          lm <- descendMinR(wCoefs[, peakinfo[p, "scaleNr"]],
                           istart = peakinfo[p, "scpos"])
          gap <- all(d[lm[1]:lm[2]] == 0) # looks like we got stuck in a gap right in the middle of the peak
          if ((lm[1] == lm[2]) || gap)   # fall-back
            lm <- descendMinTol(
              d, startpos = c(peakinfo[p, "scmin"],
                              peakinfo[p, "scmax"]),
              maxDescOutlier)
        } else {
          lm <- descendMinTol(d, startpos = c(peakinfo[p, "scmin"],
                                              peakinfo[p, "scmax"]),
                              maxDescOutlier)
        }
        ## Narrow peak rt boundaries by removing values below threshold
        lm <- .narrow_rt_boundaries(lm, d)
        lm_seq <- lm[1]:lm[2]
        pd <- d[lm_seq]
        
        peakrange <- td[lm]
        peaks[p, "rtmin"] <- scantime[peakrange[1]]
        peaks[p, "rtmax"] <- scantime[peakrange[2]]
        peaks[p, "maxo"] <- max(pd)
        pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) /
          (peakrange[2] - peakrange[1])
        if (is.na(pwid))
          pwid <- 1
        peaks[p, "into"] <- pwid * sum(pd)
        db <- pd - baseline
        peaks[p, "intb"] <- pwid * sum(db[db > 0])
        peaks[p, "lmin"] <- lm[1]
        peaks[p, "lmax"] <- lm[2]
        
        if (param$fitgauss) {
          ## perform gaussian fits, use wavelets for inital parameters
          td_lm <- td[lm_seq]
          md <- max(pd)
          d1 <- pd / md ## normalize data for gaussian error calc.
          pgauss <- fitGauss(td_lm, pd,
                             pgauss = list(mu = peaks[p, "scpos"],
                                           sigma = peaks[p, "scmax"] -
                                             peaks[p, "scmin"],
                                           h = peaks[p, "maxo"]))
          
          
          rtime <- peaks[p, "scpos"]
          if (!any(is.na(pgauss)) && all(pgauss > 0)) {
            gtime <- td[match(round(pgauss$mu), td)]
            if (!is.na(gtime)) {
              rtime <- gtime
              peaks[p, "mu"] <- pgauss$mu
              peaks[p, "sigma"] <- pgauss$sigma
              peaks[p, "h"] <- pgauss$h
              peaks[p,"egauss"] <- sqrt(
                (1 / length(td_lm)) *
                  sum(((d1 - gauss(td_lm, pgauss$h / md,
                                   pgauss$mu, pgauss$sigma))^2)))
            }
          }
          peaks[p, "rt"] <- scantime[rtime]
          ## avoid fitting side effects
          if (peaks[p, "rt"] < peaks[p, "rtmin"])
            peaks[p, "rt"] <- scantime[peaks[p, "scpos"]]
        } else
          peaks[p, "rt"] <- scantime[peaks[p, "scpos"]]
      }
      peaks <- joinOverlappingPeaks(td, d, otd, omz, od, scantime,
                                    scan.range, peaks, maxGaussOverlap)
    }
    if (!is.null(peaks)) {
      peaklist[[length(peaklist) + 1]] <- peaks
    }
  } ## f
  
  #save.image(file = paste0(length(roiList),"_",Sys.time(),".RData"));
  
  if (length(peaklist) == 0) {
    if(!.SwapEnv$.optimize_switch){
      warning("No peaks found!")
    }
    
    if (param$verboseColumns) {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames) +
                          length(verbosenames))
      colnames(nopeaks) <- c(basenames, verbosenames)
    } else {
      nopeaks <- matrix(nrow = 0, ncol = length(basenames))
      colnames(nopeaks) <- c(basenames)
    }
    
    if(.optimize_switch) {
      # do nothing
    } else {
      message(" FAIL: none found!")
    }
    return(nopeaks)
  }
  
  p <- do.call(rbind, peaklist)
  # save(p, file = paste0("p_",Sys.time(),".rda"))
  
  if (!param$verboseColumns)
    p <- p[, basenames, drop = FALSE]
  
  uorder <- order(p[, "into"], decreasing = TRUE)
  pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE])
  uindex <- rectUniqueR(pm, uorder, param$mzdiff, ydiff = -0.00001) ## allow adjacent peaks
  pr <- p[uindex, , drop = FALSE]
  
  
  if (.on.public.web() & !.optimize_switch){
    print_mes_tmp <- paste0(" OK: ", nrow(pr), " found.");    
    print_mes <- paste0(print_mes, print_mes_tmp)
    write.table(print_mes, file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
    
  } else if(.optimize_switch) {
    # do nothing
  } else {
    message(" OK: ", nrow(pr), " found.")
  }
  
  return(pr)
}

#' @title PeakPicking_Massifquant_slave
#' @description PeakPicking_Massifquant_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_Massifquant_slave <- function(x, param){
  
  # if(.on.public.web()){
  #   dyn.load(.getDynLoadPath());
  # }
  # load necessary C code for data processing
  
  if (is(x,"OnDiskMSnExp")){ # for raw data processing
    
    scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
    rt <- unname(MSnbase::rtime(x));
    
  } else if (is(x,"list")) { # for parameters optimization
    
    scan.set <- x;
    rt <- unlist(lapply(x,  MSnbase::rtime), use.names = FALSE);
    
  }
  
  mzs <- lapply(scan.set, MSnbase::mz);
  
  vals_per_spect <- lengths(mzs, FALSE);
  
  #if (any(vals_per_spect == 0))
  #  mzs <- mzs[-which(vals_per_spect == 0)];
  #  x <- x[-which(vals_per_spect == 0)]; # Cannot be so easy !
  #  warning("Found empty spectra Scan. Filtered Automatically !")
  
  mz = unlist(mzs, use.names = FALSE);
  int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
  valsPerSpect = vals_per_spect;
  scantime = rt;
  
  #------------Massifquant param ------------------/
  #mz,
  #int,
  #scantime,
  #valsPerSpect,
  ppm = param$ppm;
  peakwidth = param$peakwidth;
  snthresh = param$snthresh;
  prefilter = param$prefilter;
  mzCenterFun = "wMean";
  integrate = param$integrate;
  mzdiff = param$mzdiff;
  fitgauss = param$fitgauss;
  noise = param$noise;
  
  ##### Massifquant specific paramters----|
  verboseColumns = FALSE;
  criticalValue = param$criticalValue;
  consecMissedLimit = param$consecMissedLimit;
  unions = param$unions;
  checkBack = param$checkBack;
  withWave = param$withWave;
  ##### Massifquant specific paramters----|
  
  
  #------------Massifquant param ------------------/
  
  if (!is.double(mz))
    mz <- as.double(mz)
  if (!is.double(int))
    int <- as.double(int)
  ## Fix the mzCenterFun
  mzCenterFun <- paste("mzCenter",
                       gsub(mzCenterFun, pattern = "mzCenter.",
                            replacement = "", fixed = TRUE), sep=".")
  if(mzCenterFun == "mzCenter.wMean"){
    mzCenter.wMean <- weighted.mean;
  }
  
  if (!exists(mzCenterFun, mode="function"))
    stop(" >", mzCenterFun, "< not defined !")
  
  if(.on.public.web()){
    
    print_mes_tmp <- paste0("Detecting mass traces at ", ppm, "ppm ... ");    
    #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " ");
    #print_mes <- paste0(print_mes,print_mes_tmp)
    
    .SwapEnv$count_current_sample <- count_current_sample <- .SwapEnv$count_current_sample +1;
    count_total_sample <- .SwapEnv$count_total_sample;
    write.table(count_current_sample, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
    write.table(25 + count_current_sample*3/count_total_sample*25, file = "log_progress.txt",row.names = FALSE,col.names = FALSE)
    
  } else {
    message("Detecting  mass traces at ",ppm," ppm ... ", appendLF = FALSE)
  }
  
  
  flush.console()
  
  #massifquantROIs <- do_findKalmanROI(mz = mz, int = int, scantime = scantime,
  #                                    valsPerSpect = valsPerSpect,
  #                                    minIntensity = prefilter[2],
  #                                    minCentroids = peakwidth[1],
  #                                    criticalVal = criticalValue,
  #                                    consecMissedLim = consecMissedLimit,
  #                                    segs = unions, scanBack = checkBack,
  #                                    ppm = ppm)
  mzrange = c(0.0, 0.0);
  scanrange = c(1, length(scantime));
  minIntensity = prefilter[2];
  minCentroids = peakwidth[1];
  segs = unions;
  scanBack = checkBack;
  scanindex <- valueCount2ScanIndex(valsPerSpect)
  
  #scanindex <- valueCount2ScanIndex(valsPerSpect) 
  ## Call the C function.
  if (!is.integer(scanindex))
    scanindex <- as.integer(scanindex)
  
  if (!is.double(scantime))
    scantime <- as.double(scantime)

    tmp <- capture.output(
      massifquantROIs <- massifquantROIs(
        mz = mz, 
        int = int, 
        scanindex = scanindex, 
        scantime = scantime,
        mzrange = mzrange, 
        scanrange = scanrange,
        minIntensity = minIntensity,
        minCentroids = minCentroids, 
        consecMissedLim = consecMissedLimit,
        ppm = ppm, 
        criticalVal = criticalValue, 
        segs = segs,
        scanBack = scanBack))

  #if (withWave) {
  if (FALSE) {
    # featlist <- do_findChromPeaks_centWave(mz = mz, int = int,
    #                                        scantime = scantime,
    #                                        valsPerSpect = valsPerSpect,
    #                                        ppm = ppm, peakwidth = peakwidth,
    #                                        snthresh = snthresh,
    #                                        prefilter = prefilter,
    #                                        mzCenterFun = mzCenterFun,
    #                                        integrate = integrate,
    #                                        mzdiff = mzdiff,
    #                                        fitgauss = fitgauss,
    #                                        noise = noise,
    #                                        verboseColumns = verboseColumns,
    #                                        roiList = massifquantROIs)
  } else {
    ## Get index vector for C calls
    scanindex <- valueCount2ScanIndex(valsPerSpect);
    basenames <- c("mz","mzmin","mzmax","rtmin","rtmax","rt", "into");
    
    if (length(massifquantROIs) == 0) {
      if(!.SwapEnv$.optimize_switch){
        warning("\nNo peaks found!")
      }
      nopeaks <- matrix(nrow=0, ncol=length(basenames))
      colnames(nopeaks) <- basenames
      return(nopeaks)
    }
    
    ## Get the max intensity for each peak.
    maxo <- lapply(massifquantROIs, function(z) {
      raw <- .rawMat(mz = mz, int = int, scantime = scantime,
                     valsPerSpect = valsPerSpect,
                     mzrange = c(z$mzmin, z$mzmax),
                     scanrange = c(z$scmin, z$scmax))
      max(raw[, 3])
    })
    
    ## p <- t(sapply(massifquantROIs, unlist))
    p <- do.call(rbind, lapply(massifquantROIs, unlist, use.names = FALSE))
    colnames(p) <- basenames
    p <- cbind(p, maxo = unlist(maxo))
    
    #calculate median index
    p[, "rt"] <- as.integer(p[, "rtmin"] + ( (p[, "rt"] + 1) / 2 ) - 1)
    #convert from index into actual time
    p[, "rtmin"] <- scantime[p[, "rtmin"]]
    p[, "rtmax"] <- scantime[p[, "rtmax"]]
    p[, "rt"] <- scantime[p[, "rt"]]
    
    uorder <- order(p[, "into"], decreasing = TRUE)
    pm <- as.matrix(p[, c("mzmin", "mzmax", "rtmin", "rtmax"),
                      drop = FALSE])
    uindex <- rectUniqueR(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks;
    featlist <- p[uindex, , drop = FALSE]
    #message(" ", dim(featlist)[1]," Peaks.");
    
    .optimize_switch <- .SwapEnv$.optimize_switch;
    if(is.null(.optimize_switch)){
      .optimize_switch <- FALSE;
    }
    
    if(.on.public.web() & !.optimize_switch){
      
      print_mes_tmp2 <- paste0(" OK: ", dim(featlist)[1], " Peaks found.");
      print_mes <- paste0(print_mes_tmp,print_mes_tmp2);
      write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
      
    } else {
      
      message("OK")
      
    }
    
    return(featlist)
  }
  
  return(featlist)
}

#' @title PeakPicking_MatchedFilter_slave
#' @description PeakPicking_MatchedFilter_slave
#' @param x ms objects
#' @param param parameters set for processing
#' @noRd
#' @importFrom stats deriv3
#' @importFrom stats fft
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
PeakPicking_MatchedFilter_slave <- function(x,param){
  
  # if(.on.public.web()){
  #   dyn.load(.getDynLoadPath());
  # }
  
  if (is(x,"OnDiskMSnExp")){ # for raw data processing
    
    scan.set <- MSnbase::spectra(x, BPPARAM = SerialParam());
    rt <- MSnbase::rtime(x);
    
  } else if (is(x, "list")){ # for parameters optimization
    
    scan.set <- x;
    rt <- unlist(lapply(x,  MSnbase::rtime), use.names = FALSE);
    
  }
  
  ## 1. data & Param preparation
  mzs <- lapply(scan.set, MSnbase::mz)
  valsPerSpect <- lengths(mzs, FALSE)
  mz = unlist(mzs, use.names = FALSE);
  int = unlist(lapply(scan.set, MSnbase::intensity), use.names = FALSE);
  scantime = rt;
  mrange <- range(mz[mz > 0]);
  binSize <- param$peakBinSize;
  mass <- seq(floor(mrange[1] / binSize) * binSize,
              ceiling(mrange[2] / binSize) * binSize,
              by = binSize)
  impute <- param$impute;
  baseValue <- param$baseValue;
  distance <- param$distance;
  steps <- param$steps;
  sigma <- param$sigma;
  snthresh <-param$snthresh;
  max <- param$max;
  mzdiff <- param$mzdiff;
  index <- param$index;
  
  
  ## 2. Binning the data.
  ## Create and translate settings for binYonXR
  
  toIdx <- cumsum(valsPerSpect)
  fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L)
  shiftBy = TRUE
  
  binFromX <- min(mass)
  binToX <- max(mass)
  ## binSize <- (binToX - binFromX) / (length(mass) - 1)
  ## brks <- seq(binFromX - binSize/2, binToX + binSize/2, by = binSize)
  
  brks <- breaks_on_nBinsR(fromX = binFromX, toX = binToX,
                          nBins = length(mass), shiftByHalfBinSize = TRUE)
  
  binRes <- binYonXR(mz, int,
                    breaks = brks,
                    fromIdx = fromIdx,
                    toIdx = toIdx,
                    baseValue = ifelse(impute == "none", yes = 0, no = NA),
                    sortedX = TRUE,
                    returnIndex = TRUE
  )
  
  
  if (length(toIdx) == 1)
    binRes <- list(binRes)
  bufMax <- do.call(cbind, lapply(binRes, function(z) return(z$index)))
  bin_size <- binRes[[1]]$x[2] - binRes[[1]]$x[1]
  ## Missing value imputation
  if (missing(baseValue))
    baseValue <- numeric()
  if (length(baseValue) == 0)
    baseValue <- min(int, na.rm = TRUE) / 2
  if (missing(distance))
    distance <- numeric()
  if (length(distance) == 0)
    distance <- floor(0.075 / bin_size)
  
  binVals <- lapply(binRes, function(z) {
    return(imputeLinInterpol(z$y, method = impute, distance = distance,
                             noInterpolAtEnds = TRUE,
                             baseValue = baseValue))
  })
  
  buf <- do.call(cbind, binVals)
  
  bufidx <- 1L:length(mass)
  lookahead <- steps-1
  lookbehind <- 1
  
  N <- nextn(length(scantime))
  xrange <- range(scantime)
  x <- c(0:(N/2), -(ceiling(N/2-1)):-1) *
    (xrange[2]-xrange[1])/(length(scantime)-1)
  
  filt <- -attr(eval(deriv3(~ 1/(sigma*sqrt(2*pi)) *
                              exp(-x^2/(2*sigma^2)), "x")), "hessian")
  filt <- filt/sqrt(sum(filt^2))
  filt <- fft(filt, inverse = TRUE)/length(filt)
  
  cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf",
              "maxo", "maxf", "i", "sn")
  num <- 0
  ResList <- list()
  
  ## Can not do much here, lapply/apply won't work because of the 'steps' parameter.
  ## That's looping through the masses, i.e. rows of the profile matrix.
  for (i in seq(length = length(mass)-steps+1)) {
    ymat <- buf[bufidx[i:(i+steps-1)], , drop = FALSE]
    ysums <- colMaxR(ymat)
    yfilt <- filtfft(ysums, filt)
    gmax <- max(yfilt)
    for (j in seq(length = max)) {
      maxy <- which.max(yfilt)
      noise <- mean(ysums[ysums > 0])
      ##noise <- mean(yfilt[yfilt >= 0])
      sn <- yfilt[maxy]/noise
      if (yfilt[maxy] > 0 && yfilt[maxy] > snthresh*noise && ysums[maxy] > 0) {
        peakrange <- DescendZeroR(yfilt, maxy)
        intmat <- ymat[, peakrange[1]:peakrange[2], drop = FALSE]
        mzmat <- matrix(mz[bufMax[bufidx[i:(i+steps-1)],
                                  peakrange[1]:peakrange[2]]],
                        nrow = steps)
        which.intMax <- which.colMax(intmat)
        mzmat <- mzmat[which.intMax]
        if (all(is.na(mzmat))) {
          yfilt[peakrange[1]:peakrange[2]] <- 0
          next
        }
        mzrange <- range(mzmat, na.rm = TRUE)
        massmean <- weighted.mean(mzmat, intmat[which.intMax],
                                  na.rm = TRUE)
        ## This case (the only non-na m/z had intensity 0) was reported
        ## by Gregory Alan Barding "binlin processing"
        if(any(is.na(massmean))) {
          massmean <- mean(mzmat, na.rm = TRUE)
        }
        pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) /
          (peakrange[2] - peakrange[1])
        into <- pwid*sum(ysums[peakrange[1]:peakrange[2]])
        intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]])
        maxo <- max(ysums[peakrange[1]:peakrange[2]])
        maxf <- yfilt[maxy]
        
        yfilt[peakrange[1]:peakrange[2]] <- 0
        num <- num + 1
        ResList[[num]] <- c(massmean, mzrange[1], mzrange[2], maxy,
                            peakrange, into, intf, maxo, maxf, j, sn)
      } else
        break
    }
  }
  
  
  if (length(ResList) == 0) {
    rmat <- matrix(nrow = 0, ncol = length(cnames))
    colnames(rmat) <- cnames
    return(rmat)
  }
  rmat <- do.call(rbind, ResList)
  if (is.null(dim(rmat))) {
    rmat <- matrix(rmat, nrow = 1)
  }
  colnames(rmat) <- cnames
  max <- max-1 + max*(steps-1) + max*ceiling(mzdiff/binSize)
  if (index){
    mzdiff <- mzdiff/binSize
  }  else {
    rmat[, "rt"] <- scantime[rmat[, "rt"]]
    rmat[, "rtmin"] <- scantime[rmat[, "rtmin"]]
    rmat[, "rtmax"] <- scantime[rmat[, "rtmax"]]
  }
  ## Select for each unique mzmin, mzmax, rtmin, rtmax the largest peak
  ## and report that.
  uorder <- order(rmat[, "into"], decreasing = TRUE)
  uindex <- rectUniqueR(rmat[, c("mzmin", "mzmax", "rtmin", "rtmax"),
                            drop = FALSE],
                       uorder, mzdiff)
  rmat <- rmat[uindex,,drop = FALSE]
  
  return(rmat)
  
}

#' @title PerformPeakGrouping
#' @description PerformPeakGrouping
#' @param mSet mSet object.
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @noRd
PerformPeakGrouping<-function(mSet){
  
  if(missing(mSet) & file.exists("mSet.rda")){
    load("mSet.rda")
  } else if(missing(mSet) & !file.exists("mSet.rda")){
    if(.on.public.web()){
      MessageOutput("ERROR: mSet is missing !", NULL, NULL);
      stop();
    } else {
      stop("mSet is missing ! Please make sure mSet is well defined !");
    }
  }
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 200; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }

  param <- mSet@params;
  
  ## 1. Verify & Extract Information-------
  
  if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
    } else {
      stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
    }
  }
  
  if(length(mSet@peakRTcorrection)==0){
    
    if(length(mSet@peakpicking) == 0){
      MessageOutput("ERROR: No Chromatographic peaks detected, please \"PerformPeakPicking\" first !")
    } else {
      peaks_0 <- mSet@peakpicking$chromPeaks;
    }
    
  } else {
    peaks_0 <- mSet@peakRTcorrection$chromPeaks;
  }
  
  peaks <- OptiChromPeaks(peaks_0);
  
  peaks <- cbind(peaks_0[, c("mz", "rt", "sample", "into"), drop = FALSE],
                 index = seq_len(nrow(peaks_0)))
  
  if (length(mSet@rawOnDisk) != 0) {
    
    sample_group<-as.character(mSet@rawOnDisk@phenoData@data[["sample_group"]]);
    if (identical(sample_group,character(0))){
      sample_group<-as.character(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
    }
    
  } else if (length(mSet@rawInMemory) != 0) {
    
    sample_group<-as.character(mSet@rawInMemory@phenoData@data[["sample_group"]]);   
    if (identical(sample_group,character(0))){
      sample_group<-as.character(mSet@rawInMemory@phenoData@data[["sample_name"]]);
    }
    
  } else {
    stop("Exceptions occurs during data import process. Please check your data carefully!")
  }
  
  sampleGroupNames <- unique(sample_group)
  sampleGroupTable <- table(sample_group)
  nSampleGroups <- length(sampleGroupTable)
  
  # if BLANK Substraction has already been performed, filter out all blank features here
  if(!is.null(mSet@peakRTcorrection[["BlankSubstracted"]])){
    if(mSet@peakRTcorrection[["BlankSubstracted"]]){
      peaks <- peaks[peaks[,"sample"] != 0,];
      sample_group <- sample_group[sample_group != "BLANK"]
      sampleGroupNames <- sampleGroupNames[sampleGroupNames != "BLANK"]
      sampleGroupTable <- sampleGroupTable[-which(names(sampleGroupTable) == "BLANK")]
      nSampleGroups <- length(sampleGroupTable)
    }
  }
  
  ## 2. Order peaks matrix by mz-------
  peaks <- peaks[order(peaks[, "mz"]), , drop = FALSE]
  rownames(peaks) <- NULL
  rtRange <- range(peaks[, "rt"])
  
  ## 3. Define the mass slices and the index in the peaks matrix with an mz-------
  mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + param$binSize,
              by = param$binSize / 2)
  masspos <- findEqualGreaterMR(peaks[, "mz"], mass)
  
  densFrom <- rtRange[1] - 3 * param$bw
  densTo <- rtRange[2] + 3 * param$bw
  
  ## 4. Increase the number of sampling points for the density distribution.-------
  densN <- max(512, 2 * 2^(ceiling(log2(diff(rtRange) / (param$bw / 2)))))
  endIdx <- 0
  
  if (!.optimize_switch){
    print_mes <- paste0("Total of ", length(mass) - 1, " slices detected for processing... ");
  }
  
  resL <- vector("list", (length(mass) - 2))
  
  for (i in seq_len(length(mass)-2)) {
    ## That's identifying overlapping mz slices.
    startIdx <- masspos[i]
    endIdx <- masspos[i + 2] - 1
    if (endIdx - startIdx < 0)
      next
    resL[[i]] <- Densitygrouping_slave(peaks[startIdx:endIdx, , drop = FALSE],
                                       bw = param$bw, densFrom = densFrom,
                                       densTo = densTo, densN = densN,
                                       sampleGroups = sample_group,
                                       sampleGroupTable = sampleGroupTable,
                                       minFraction = param$minFraction,
                                       minSamples = param$minSamples,
                                       maxFeatures = param$maxFeatures)
  }
  
  if (!.optimize_switch){
    print_mes_tmp <- paste("Done ! \nGoing to the next step...");
    MessageOutput(mes = paste0(print_mes,print_mes_tmp), ecol = "\n", NULL)
  }
  
  res <- do.call(rbind, resL)
  
  if (nrow(res)) {
    ## Remove groups that overlap with more "well-behaved" groups
    numsamp <- rowSums(
      as.matrix(res[, (match("npeaks", colnames(res)) +1):(ncol(res) -1),
                    drop = FALSE]))
    uorder <- order(-numsamp, res[, "npeaks"])
    
    uindex <- rectUniqueR(
      as.matrix(res[, c("mzmin", "mzmax", "rtmin", "rtmax"),
                    drop = FALSE]), uorder)
    res <- res[uindex, , drop = FALSE]
    rownames(res) <- NULL
  }
  
  df <- S4Vectors::DataFrame(res)
  rownames(df) <- sprintf(paste0("FT", "%0", ceiling(log10(nrow(df) + 1L)), "d"),
                          seq(from = 1, length.out = nrow(df)))
  
  FeatureGroupTable <- df;
  n <- length(mSet@peakgrouping) + 1;
  mSet@peakgrouping[[n]] <- FeatureGroupTable;

  return(mSet)
}

#' @title Densitygrouping_slave
#' @description Densitygrouping_slave
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @noRd
Densitygrouping_slave <- function(x, bw, densFrom, densTo, densN, sampleGroups,
                                  sampleGroupTable, minFraction,
                                  minSamples, maxFeatures) {
  den <- density(x[, "rt"], bw = bw, from = densFrom, to = densTo,
                 n = densN)
  maxden <- max(den$y)
  deny <- den$y
  sampleGroupNames <- names(sampleGroupTable)
  nSampleGroups <- length(sampleGroupNames)
  col_nms <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",
               "npeaks", sampleGroupNames)
  res_mat <- matrix(nrow = 0, ncol = length(col_nms),
                    dimnames = list(character(), col_nms))
  res_idx <- list()
  while (deny[maxy <- which.max(deny)] > maxden / 20 && nrow(res_mat) <
         maxFeatures) {
    grange <- descendMinR(deny, maxy)
    deny[grange[1]:grange[2]] <- 0
    gidx <- which(x[,"rt"] >= den$x[grange[1]] &
                    x[,"rt"] <= den$x[grange[2]])
    ## Determine the sample group of the samples in which the peaks
    ## were detected and check if they correspond to the required limits.
    tt <- table(sampleGroups[unique(x[gidx, "sample"])])
    if (!any(tt / sampleGroupTable[names(tt)] >= minFraction &
             tt >= minSamples))
      next
    gcount <- rep(0, length(sampleGroupNames))
    names(gcount) <- sampleGroupNames
    gcount[names(tt)] <- as.numeric(tt)
    res_mat <- rbind(res_mat,
                     c(median(x[gidx, "mz"]),
                       range(x[gidx, "mz"]),
                       median(x[gidx, "rt"]),
                       range(x[gidx, "rt"]),
                       length(gidx),
                       gcount)
    )
    res_idx <- c(res_idx, list(unname(sort(x[gidx, "index"]))))
  }
  
  res <- as.data.frame(res_mat)
  res$peakidx <- res_idx
  res
}

#' @title PerformPeakAlignment
#' @description PerformPeakAlignment
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @export
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @return will return an mSet object with peak aligned done
#' @examples 
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#'                full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' mSet <- PerformPeakAlignment(mSet);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)

PerformPeakAlignment<-function(mSet){
  ## 1~4. Peak Grouping ---------
  mSet<-PerformPeakGrouping(mSet);
  
  ## Insert blank substraction here
  mSet <- PerformBlankSubstraction(mSet);
  
  ## 5. Perform RT correction ---------
  mSet<-PerformRTcorrection(mSet)
  
  ## 6~9. Peak Grouping Again ---------
  mSet<-PerformPeakGrouping(mSet)
  
  ## 10. Return ------
  return(mSet)
}

PerformRTcorrection <- function(mSet){
  
  ## 5. Adjust Retention Time-------
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }

  if(missing(mSet) & file.exists("mSet.rda")){
    load("mSet.rda")
  } else if(missing(mSet) & !file.exists("mSet.rda")){
    if(.on.public.web()){
      MessageOutput("ERROR: mSet is missing !", NULL, NULL);
      stop();
    } else {
      stop("mSet is missing ! Please make sure mSet is well defined !");
    }
  }
  
  #TODO: add a function to verify the param in mSet
  
  if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL)
    } else {
      stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !")
    }
  }
  
  if(length(mSet@peakgrouping) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No grouped peaks found, please \"PerformPeakGrouping\" first !", NULL, NULL)
    } else {
      stop("No grouped peaks found, please \"PerformPeakGrouping\" first !")
    }
  }
  
  param <- mSet@params;
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  if (.on.public.web() & !.optimize_switch){
    MessageOutput(mes = paste("Retention time correction is running."),
                  ecol = "\n",
                  progress = 60)
  }
  
  if (param[["RT_method"]] == "obiwarp"){
    if (!.optimize_switch){
      MessageOutput(mes = paste("obiwarp is used for retention time correction. \n"),
                    ecol = "",
                    progress = NULL)
    }
    
    mSet <- tryCatch(adjustRtime_obiwarp(mSet, param, msLevel = 1L), error = function(e){e});
  } else {
    if (!.optimize_switch){
      MessageOutput(mes = paste("PeakGroup -- loess is used for retention time correction."),
                    ecol = "",
                    progress = NULL)
    }
    
    mSet <- tryCatch(adjustRtime_peakGroup(mSet, param, msLevel = 1L), error = function(e){e});
  }
  
  if (is(mSet,"simpleError") & !.optimize_switch){
   
    MessageOutput(
      mes = paste0("<font color=\"red\">","\nERROR:",mSet$message,"</font>"),
      ecol = "\n",
      progress = NULL
    )
    stop("EXCEPTION POINT CODE: RT1");
  }
  
  #
  if (!.optimize_switch){
    MessageOutput(
      mes = "Done !",
      ecol = "\n",
      progress = 88
    )
  }
  # 
  # if(.on.public.web()){
  #   save(mSet, file = "mSet.rda");
  # }
  # 
  return(mSet);
}

adjustRtime_peakGroup <- function(mSet, param, msLevel = 1L) {
  
  # 1). Group Matrix-------
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  # 1.1). Subset selection
  
  if (length(mSet@rawOnDisk) != 0){
    if(param$BlankSub){
      subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
      subs <- subs[mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"]
      specdata <- mSet@rawOnDisk;
      Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");
    } else {
      subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
      specdata <- mSet@rawOnDisk;
      Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");
    }
   
    if (length(Subset_QC) < 30){
      Subset <- integer(0);
    } else {
      Subset <- Subset_QC;
    }
    
    peaks_0 <- mSet@peakpicking$chromPeaks;
    
  } else {
    subs <- seq_along(mSet@rawInMemory@phenoData@data[["sample_name"]]);
    specdata <- mSet@rawInMemory;
    Subset_QC <- which(mSet@rawInMemory@phenoData@data[["sample_group"]]=="QC");
    # files_list <- mSet[[format]]@processingData@files;
    
    if (length(Subset_QC) < 30){
      Subset <- integer(0);
    } else {
      Subset <- Subset_QC;
    }
    
    peaks_0 <- mSet@peakpicking$chromPeaks;
  }
  
  if(!identical(Subset, integer(0))){
    subs <- Subset;
  }
  
  nSamples <- length(subs)
  
  subset_names <- original_names <- mSet@peakpicking$chromPeakData@rownames;
  n <- length(mSet@peakgrouping);
  pidx <- mSet@peakgrouping[[n]]$peakidx; # RT correction based on latest grouping results
  
  mSet@peakgrouping[[n]]$peakidx <- lapply(pidx, function(z) {
    idx <- base::match(original_names[z], subset_names)
    idx[!is.na(idx)]
  })
  peakIndex_0 <- mSet@peakgrouping[[n]][lengths(mSet@peakgrouping[[n]]$peakidx) > 0, ]
  peakIndex <- .peakIndex(peakIndex_0)
  
  
  pkGrpMat <- .getPeakGroupsRtMatrix(peaks = peaks_0,
                                     peakIndex = peakIndex,
                                     sampleIndex = subs,
                                     missingSample = nSamples - (nSamples * param$minFraction),
                                     extraPeaks = param$extraPeaks
  )
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  if (is.null(pkGrpMat) & !.optimize_switch){
    MessageOutput(mes = paste0("<font color=\"red\">","\nERROR:","No enough peaks detected, please adjust your parameters or use other Peak/Alignment method","</font>"),
                  ecol = "\n",
                  progress = 65)
  }
  colnames(pkGrpMat) <- specdata@phenoData@data[["sample_name"]][subs]
  
  if (!.optimize_switch){
    MessageOutput(mes =paste("...."), "\n", 65)
  }
  
  # 2). RT adjustment-------
  if(param$BlankSub){
    rtime_0 <- rtime(specdata)
    
    rtime0 <- split(rtime_0, fromFile(specdata))
    tmp <- rtime0[subs]
    rtime <- vector("list", length(fileNames(specdata)))
    names(rtime) <- as.character(seq_along(rtime))
    rtime[as.numeric(names(tmp))] <- tmp
    rtime <- rtime[subs]
    names(rtime) <- seq_along(rtime)
    
    peaks_1 <- peaks_0;
    peaks_1[!(peaks_1[, "sample"] %in% subs), "sample"] <- 0;
    #### "sample" == 0 means this samples are considered as blank and will be excluded for future!
    j <- 1;
    for(jj in unique(peaks_1[,"sample"])){
      if(jj == 0) next;
      peaks_1[peaks_1[,"sample"] == jj, "sample"] <- j;
      j <- j + 1
    }
  } else {
    rtime_0 <- rtime(specdata)
    
    tmp <- split(rtime_0, fromFile(specdata));
    rtime <- vector("list", length(fileNames(specdata)));
    names(rtime) <- as.character(seq_along(rtime));
    rtime[as.numeric(names(tmp))] <- tmp;

    peaks_1 <- peaks_0;
  }

  
  res <- RT.Adjust_peakGroup(peaks_1,
                             peakIndex = peakIndex_0$peakidx,
                             rtime = rtime,
                             minFraction = param$minFraction,
                             extraPeaks = param$extraPeaks,
                             smooth = param$smooth,
                             span = param$span,
                             family = param$family,
                             peakGroupsMatrix = pkGrpMat,
                             subset = Subset,
                             subsetAdjust = param$subsetAdjust)

  
  if (!.optimize_switch){
    MessageOutput(mes ="Applying retention time adjustment to the identified chromatographic peaks ... ",
                  "", 
                  66)
  }
  
  if(param$BlankSub){
    #names(rtime) <- names(res) <- subs;
    #rtime0[subs] <- res;
    
    #ChromPeaks <- mSet@peakpicking$chromPeaks;
    #ChromPeaks <- ChromPeaks[ChromPeaks[,"sample"] %in% subs,]
    ChromPeaks <- peaks_1;
    fts <- .applyRtAdjToChromPeaks(ChromPeaks,
                                   rtraw = rtime,
                                   rtadj = res);
    fts <- fts[!mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]],]
    mSet@peakRTcorrection$BlankSubstracted <- TRUE;
  } else {
    fts <- .applyRtAdjToChromPeaks(mSet@peakpicking$chromPeaks,
                                   rtraw = rtime,
                                   rtadj = res)
    mSet@peakRTcorrection$BlankSubstracted <- FALSE;
  }
  
  mSet@peakRTcorrection[["pkGrpMat_Raw"]] <- pkGrpMat;
  mSet@peakRTcorrection[["chromPeaks"]] <- fts;
  mSet@peakRTcorrection[["adjustedRT"]] <- res;
  
  return(mSet);
}

#' @importFrom stats approxfun
adjustRtime_obiwarp <- function(mSet, param, msLevel = 1L) {
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  ## Filter for MS level, perform adjustment and if the object
  ## contains spectra from other MS levels too, adjust all raw
  ## rts based on the difference between adjusted and raw rts.
  
  if (length(mSet@rawOnDisk) != 0){
    if (param$BlankSub) {
      subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
      subs <-
        subs[mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK"]
    } else {
      subs <- seq_along(mSet@rawOnDisk@phenoData@data[["sample_name"]]);
    }
    specdata <- mSet@rawOnDisk;
    
    object_sub <- MSnbase::filterMsLevel(
      MSnbase::selectFeatureData(specdata,
                                 fcol = c(.MSnExpReqFvarLabels,
                                          "centroided")), msLevel. = 1);

  } else {
    subs <- seq_along(mSet@rawInMemory@phenoData@data[["sample_name"]]);
    specdata <- mSet@rawInMemory;
    object_sub <- MSnbase::filterMsLevel(specdata, msLevel. = 1); # No need to filter!
  }
  
  #object <- mSet@rawOnDisk;

  if (length(object_sub) == 0)
    stop("No spectra of MS level ", msLevel, " present")
  
  res <- mSet.obiwarp(mSet, object_sub, param = param)
  
  ## Adjust the retention time for spectra of all MS levels, if
  ## if there are some other than msLevel (issue #214).
  if (length(unique(msLevel(specdata))) !=
      length(unique(msLevel(object_sub)))) {
    
    if (!.optimize_switch){
      MessageOutput(
        mes = paste0("Apply retention time correction performed on MS",
                     msLevel, " to spectra from all MS levels - obiwarp"),
        ecol = "",
        progress = 66
      )
    } 

    ## I need raw and adjusted rt for the adjusted spectra
    ## and the raw rt of all.
    rtime_all <- split(rtime(specdata), fromFile(specdata))
    rtime_sub <- split(rtime(object_sub), fromFile(object_sub))
    ## For loop is faster than lapply. No sense to do parallel
    for (i in seq_along(rtime_all)) {
      n_vals <- length(rtime_sub[[i]])
      idx_below <- which(rtime_all[[i]] < rtime_sub[[i]][1])
      if (length(idx_below))
        vals_below <- rtime_all[[i]][idx_below]
      idx_above <- which(rtime_all[[i]] >
                           rtime_sub[[i]][n_vals])
      if (length(idx_above))
        vals_above <- rtime_all[[i]][idx_above]
      ## Adjust the retention time. Note: this should be
      ## OK even if values are not sorted.
      adj_fun <- approxfun(x = rtime_sub[[i]], y = res[[i]])
      rtime_all[[i]] <- adj_fun(rtime_all[[i]])
      ## Adjust rtime < smallest adjusted rtime.
      if (length(idx_below)) {
        rtime_all[[i]][idx_below] <- vals_below +
          res[[i]][1] - rtime_sub[[i]][1]
      }
      ## Adjust rtime > largest adjusted rtime
      if (length(idx_above)) {
        rtime_all[[i]][idx_above] <- vals_above +
          res[[i]][n_vals] - rtime_sub[[i]][n_vals]
      }
    }
    res <- rtime_all
  }
  res <- unlist(res, use.names = FALSE)
  sNames <- unlist(split(featureNames(specdata), fromFile(specdata)),
                   use.names = FALSE)
  names(res) <- sNames
  res <- res[featureNames(specdata)]
  #res;
  
  adjustedRtime_res <- unname(split(res, fromFile(specdata)));
  #mSet[["msFeatureData"]][["adjustedRT"]] <- adjustedRtime_res;
  
  rtime_0 <- rtime(specdata)
  tmp <- split(rtime_0, fromFile(specdata))
  rtime <- vector("list", length(fileNames(specdata)))
  names(rtime) <- as.character(seq_along(rtime))
  rtime[as.numeric(names(tmp))] <- tmp
  
  fts <- .applyRtAdjToChromPeaks(mSet@peakpicking$chromPeaks,
                                 rtraw = rtime,
                                 rtadj = adjustedRtime_res);
  
  if(param$BlankSub){
    fts[!(fts[, "sample"] %in% subs), "sample"] <- 0;
    #### "sample" == 0 means this samples are considered as blank and will be excluded for future!
    j <- 1;
    for(jj in unique(fts[,"sample"])){
      if(jj == 0) next;
      fts[fts[,"sample"] == jj, "sample"] <- j;
      j <- j + 1
    }
    fts <- fts[!mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]],];
    adjustedRtime_res <- adjustedRtime_res[subs];
    mSet@peakRTcorrection$BlankSubstracted <- TRUE;
  } else {
    mSet@peakRTcorrection$BlankSubstracted <- FALSE;
  }
  
  mSet@peakRTcorrection[["chromPeaks"]] <- fts;
  mSet@peakRTcorrection[["adjustedRT"]] <- adjustedRtime_res;
  
  return(mSet);
}

#' @title RT.Adjust_peakGroup
#' @description RT.Adjust_peakGroup
#' @param peaks peaks
#' @param peakIndex peakIndex
#' @param rtime rtime
#' @param minFraction minFraction
#' @param extraPeaks extraPeaks
#' @param smooth smooth
#' @param span span
#' @param family family
#' @param peakGroupsMatrix peakGroupsMatrix
#' @param subset subset
#' @param subsetAdjust subsetAdjust
#' @importFrom stats approx
#' @importFrom stats lsfit
#' @importFrom stats loess
#' @importFrom stats predict
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#'
RT.Adjust_peakGroup <-
  function(peaks, peakIndex, rtime, minFraction = 0.9, extraPeaks = 1,
           smooth = c("loess", "linear"), span = 0.2,
           family = c("gaussian", "symmetric"),
           peakGroupsMatrix = matrix(ncol = 0, nrow = 0),
           subset = integer(),
           subsetAdjust = c("average", "previous"))  {
   
    if(!exists(".SwapEnv")){
      .SwapEnv <<- new.env(parent = .GlobalEnv);
      .SwapEnv$.optimize_switch <- FALSE;
      .SwapEnv$count_current_sample <- 0;
      .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
      .SwapEnv$envir <- new.env();
    }
    
    .optimize_switch <- .SwapEnv$.optimize_switch;
    if(is.null(.optimize_switch)){
      .optimize_switch <- FALSE;
    }
    
    subsetAdjust <- match.arg(subsetAdjust)
    ## Check input.
    if (missing(peaks) | missing(peakIndex) | missing(rtime))
      stop("Arguments 'peaks', 'peakIndex' and 'rtime' are required!")
    smooth <- match.arg(smooth)
    family <- match.arg(family)
    ## minFraction
    if (any(minFraction > 1) | any(minFraction < 0))
      stop("'minFraction' has to be between 0 and 1!")
    ## Check peakIndex:
    if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks)))))
      stop("Some indices listed in 'peakIndex' are outside of ",
           "1:nrow(peaks)!")
    ## Check rtime:
    if (!is.list(rtime))
      stop("'rtime' should be a list of numeric vectors with the retention ",
           "times of the spectra per sample!")
    if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE)))
      stop("'rtime' should be a list of numeric vectors with the retention ",
           "times of the spectra per sample!")
    if (length(rtime) != max(peaks[, "sample"]))
      stop("The length of 'rtime' does not match with the total number of ",
           "samples according to the 'peaks' matrix!")
    total_samples <- length(rtime)
    
    if (length(subset)) {
      if (!is.numeric(subset))
        stop("If provided, 'subset' is expected to be an integer")
      if (!all(subset %in% seq_len(total_samples)))
        stop("One or more indices in 'subset' are out of range.")
      if (length(subset) < 2)
        stop("Length of 'subset' too small: minimum required samples for ",
             "alignment is 2.")
    } else {
      subset <- seq_len(total_samples)
    }
    
    ## Translate minFraction to number of allowed missing samples.
    nSamples <- length(subset)
    missingSample <- nSamples - (nSamples * minFraction)
    ## Remove peaks not present in "subset" from the peakIndex
    peaks_in_subset <- which(peaks[, "sample"] %in% subset)
    peakIndex <- lapply(peakIndex, function(z) z[z %in% peaks_in_subset])
    ## Check if we've got a valid peakGroupsMatrix
    ## o Same number of samples.
    ## o range of rt values is within the rtime.
    if (nrow(peakGroupsMatrix)) {
      if (ncol(peakGroupsMatrix) != nSamples)
        stop("'peakGroupsMatrix' has to have the same number of columns ",
             "as there are samples!")
      pg_range <- range(peakGroupsMatrix, na.rm = TRUE)
      rt_range <- range(rtime)
      if (!(pg_range[1] >= rt_range[1] & pg_range[2] <= rt_range[2]))
        stop("The retention times in 'peakGroupsMatrix' have to be within",
             " the retention time range of the experiment!")
      rt <- peakGroupsMatrix
    } else
      rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, subset,
                                   missingSample, extraPeaks)
    if (ncol(rt) != length(subset))
      stop("Length of 'subset' and number of columns of the peak group ",
           "matrix do not match.")
   
    if (length(rt) == 0)
      stop("No peak groups found in the data for the provided settings")
    
    if (.on.public.web() & !.optimize_switch){
      
      print_mes <- paste("Performing retention time correction using ", nrow(rt)," peak groups.");    
      write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
      
    } else if(.optimize_switch) {
      #Do nothing
    } else {
      message("Performing retention time correction using ", nrow(rt)," peak groups.")
    }
    
    ## Calculate the deviation of each peak group in each sample from its
    ## median
    rtdev <- rt - apply(rt, 1, median, na.rm = TRUE)
    
    if (smooth == "loess") {
      mingroups <- min(colSums(!is.na(rt)))
      if (mingroups < 4) {
        smooth <- "linear"
        if(!.SwapEnv$.optimize_switch){
        warning("Too few peak groups for 'loess', reverting to linear",
                " method")
        }
      } else if (mingroups * span < 4) {
        span <- 4 / mingroups
        if(!.SwapEnv$.optimize_switch){
        warning("Span too small for 'loess' and the available number of ",
                "peak groups, resetting to ", round(span, 2))
        }
      }
    }
    
    rtdevsmo <- vector("list", nSamples)
    
    ## Code for checking to see if retention time correction is overcorrecting
    rtdevrange <- range(rtdev, na.rm = TRUE)
    warn.overcorrect <- FALSE
    warn.tweak.rt <- FALSE
    
    rtime_adj <- rtime
    ## Adjust samples in subset.
    for (i in seq_along(subset)) {
      i_all <- subset[i]              # Index of sample in whole dataset.
      pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i]))
      
      ## order the data.frame such that rt and rtdev are increasingly ordered.
      pk_idx <- order(pts$rt, pts$rtdev)
      pts <- pts[pk_idx, ]
      if (smooth == "loess") {
        lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span,
                                     degree = 1, family = family))
        
        rtdevsmo[[i]] <- na.flatfill(
          predict(lo, data.frame(rt = rtime[[i_all]])))
        ## Remove singularities from the loess function
        rtdevsmo[[i]][abs(rtdevsmo[[i]]) >
                        quantile(abs(rtdevsmo[[i]]), 0.9,
                                 na.rm = TRUE) * 2] <- NA
        if (length(naidx <- which(is.na(rtdevsmo[[i]]))))
          rtdevsmo[[i]][naidx] <- suppressWarnings(
            approx(na.omit(data.frame(rtime[[i_all]], rtdevsmo[[i]])),
                   xout = rtime[[i_all]][naidx], rule = 2)$y
          )
        
        ## Check if there are adjusted retention times that are not ordered
        ## increasingly. If there are, search for each first unordered rt
        ## the next rt that is larger and linearly interpolate the values
        ## in between (see issue #146 for an illustration).
        while (length(decidx <- which(diff(rtime[[i_all]] - rtdevsmo[[i]]) < 0))) {
          warn.tweak.rt <- TRUE  ## Warn that we had to tweak the rts.
          rtadj <- rtime[[i_all]] - rtdevsmo[[i]]
          rtadj_start <- rtadj[decidx[1]] ## start interpolating from here
          ## Define the
          next_larger <- which(rtadj > rtadj[decidx[1]])
          if (length(next_larger) == 0) {
            ## Fix if there is no larger adjusted rt up to the end.
            next_larger <- length(rtadj) + 1
            rtadj_end <- rtadj_start
          } else {
            next_larger <- min(next_larger)
            rtadj_end <- rtadj[next_larger]
          }
          ## linearly interpolate the values in between.
          adj_idxs <- (decidx[1] + 1):(next_larger - 1)
          incr <- (rtadj_end - rtadj_start) / length(adj_idxs)
          rtdevsmo[[i]][adj_idxs] <- rtime[[i_all]][adj_idxs] -
            (rtadj_start + (seq_along(adj_idxs)) * incr)
        }
        
        rtdevsmorange <- range(rtdevsmo[[i]]);
        
        divRes <- unique((rtdevsmorange / rtdevrange > 2));
        if(!is.na(divRes)[1]){
          if (any(divRes))
            warn.overcorrect <- TRUE
        }
        
      } else {
        if (nrow(pts) < 2 & !.optimize_switch) {
          stop("No enough peak groups even for linear smoothing available! Please check your data quality or decrease \"bw\" util to 1.")
        }
        ## Use lm instead?
        fit <- suppressWarnings(lsfit(pts$rt, pts$rtdev));
        rtdevsmo[[i]] <- rtime[[i_all]] * fit$coef[2] + fit$coef[1];
        ptsrange <- range(pts$rt);
        minidx <- rtime[[i_all]] < ptsrange[1];
        maxidx <- rtime[[i_all]] > ptsrange[2];
        rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)];
        rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)];
      }
      ## Finally applying the correction
      rtime_adj[[i_all]] <- rtime[[i_all]] - rtdevsmo[[i]]
    }
    
    ## Adjust the remaining samples.
    rtime_adj <- adjustRtimeSubset(rtime, rtime_adj, subset = subset,
                                   method = subsetAdjust)
    
    if (warn.overcorrect & !.SwapEnv$.optimize_switch) {
      warning("Fitted retention time deviation curves exceed points by more",
              " than 2x. This is dangerous and the algorithm is probably ",
              "overcorrecting your data. Consider increasing the span ",
              "parameter or switching to the linear smoothing method.")
    }
    
    if (warn.tweak.rt & !.SwapEnv$.optimize_switch) {
      warning(call. = FALSE, "Adjusted retention times had to be ",
                "re-adjusted for some files to ensure them being in the same",
                " order than the raw retention times. A call to ",
                "'dropAdjustedRtime' might thus fail to restore retention ",
                "times of chromatographic peaks to their original values. ",
                "Eventually consider to increase the value of the 'span' ",
                "parameter.")
    }
    rtime_adj
  }

mSet.obiwarp <- function(mSet, object, param) { ## Do not use the params defined by user for now!
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  param <- list();
  
  param$binSize <- 1;
  param$response <- 1;
  param$distFun <- "cor_opt";
  param$gapInit <- 0.3;
  param$gapExtend <- 2.4;
  param$factorDiag <- 2;
  param$factorGap <- 1;
  param$localAlignment <- FALSE;
  param$initPenalty <- 0;
  
  # 1.1). Subset selection
  
  Subset_QC <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]]=="QC");

  if (length(Subset_QC) < 30){
    Subset <- integer(0);
  } else {
    Subset <- Subset_QC;
  }
  
  subs <- Subset;
  
  if (!length(subs))
    subs <- seq_along(fileNames(object))
  
  total_samples <- length(fileNames(object))
  nSamples <- length(subs)
  
  if (nSamples <= 1)
    stop("Can not perform a retention time correction on less than two",
         " files.")
  
  centerSample <- floor(median(seq_len(nSamples)));
  
  if (.on.public.web() & !.optimize_switch){
    
    print_mes <- paste0("Sample number ", centerSample, " used as center sample.\n");    
    write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
    #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
    
  } else {
    MessageOutput(paste0("Sample number ", centerSample, " used as center sample.\n"), "", NULL)
  } 

  rtraw <- split(rtime(object), fromFile(object))
  object <- filterFile(object, file = subs)
  ## Get the profile matrix of the center sample:
  ## Using the (hidden) parameter returnBreaks to return also the breaks of
  ## the bins of the profile matrix. I can use them to align the matrices
  ## later.
  ## NOTE: it might be event better to just re-use the breaks from the center
  ## sample for the profile matrix generation of all following samples.
  suppressMessages(
    profCtr <- profMat(object, method = "bin",
                       step = param$binSize,
                       fileIndex = centerSample,
                       returnBreaks = TRUE)[[1]]
  )
  ## Now split the object by file
  objL <- splitByFile(object, f = factor(seq_len(nSamples)))
  objL <- objL[-centerSample]
  centerObject <- filterFile(object, file = centerSample)
  ## Now we can bplapply here!
  res <- lapply(objL, function(z, cntr, cntrPr, parms) {
    
    message("Aligning ", basename(fileNames(z)), " against ",
            basename(fileNames(cntr)), " ... ", appendLF = TRUE)
    
    ## Get the profile matrix for the current file.
    suppressMessages(
      curP <- profMat(z, method = "bin", step = parms$binSize,
                      returnBreaks = TRUE)[[1]]
    )
    
    ## ---------------------------------------
    ## 1)Check the scan times of both objects:
    scantime1 <- unname(rtime(cntr))
    scantime2 <- unname(rtime(z))
    ## median difference between spectras' scan time.
    mstdiff <- median(c(diff(scantime1), diff(scantime2)))

    mst1 <- which(diff(scantime1) > 5 * mstdiff)[1]
    if (!is.na(mst1)) {
      scantime1 <- scantime1[seq_len((mst1 - 1))]

      if (!.optimize_switch){
        MessageOutput(paste0("Found gaps in scan times of the center sample: cut ", "scantime-vector at ", scantime1[mst1]," seconds."),
                      "", NULL)
      } 

      if (.on.public.web() & !.optimize_switch){
        
        print_mes <- paste0("Found gaps in scan times of the center sample: cut ", "scantime-vector at ", scantime1[mst1]," seconds.");    
        write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
        #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
        
      } else {
        
        message("Found gaps in scan times of the center sample: cut ",
                "scantime-vector at ", scantime1[mst1]," seconds.")
      }
      
    }
    
    mst2 <- which(diff(scantime2) > 5 * mstdiff)[1]
    
    if(!is.na(mst2)) {
      scantime2 <- scantime2[seq_len((mst2 - 1))]

      if (!.optimize_switch){
        MessageOutput(paste0("Found gaps in scan time of file ", basename(fileNames(z)), ": cut scantime-vector at ", scantime2[mst2]," seconds."),
                      "", NULL)
      } 

      if (.on.public.web() & !.optimize_switch){
        
        print_mes <- paste0("Found gaps in scan time of file ", basename(fileNames(z)), ": cut scantime-vector at ", scantime2[mst2]," seconds.");    
        write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
        #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
        
      } else {
        
        message("Found gaps in scan time of file ", basename(fileNames(z)),
                ": cut scantime-vector at ", scantime2[mst2]," seconds.")
      }

    }
    ## Drift of measured scan times - expected to be largest at the end.
    rtmaxdiff <- abs(diff(c(scantime1[length(scantime1)],
                            scantime2[length(scantime2)])))
    ## If the drift is larger than the threshold, cut the matrix up to the
    ## max allowed difference.
    if (rtmaxdiff > (5 * mstdiff)) {
      rtmax <- min(scantime1[length(scantime1)],
                   scantime2[length(scantime2)])
      scantime1 <- scantime1[scantime1 <= rtmax]
      scantime2 <- scantime2[scantime2 <= rtmax]
    }
    valscantime1 <- length(scantime1)
    valscantime2 <- length(scantime2)
    
    ## Ensure we have the same number of scans.
    if (valscantime1 != valscantime2) {
      min_number <- min(valscantime1, valscantime2)
      diffs <- abs(range(scantime1) - range(scantime2))
      ## Cut at the start or at the end, depending on where we have the
      ## larger difference
      if (diffs[2] > diffs[1]) {
        scantime1 <- scantime1[seq_len(min_number)]
        scantime2 <- scantime2[seq_len(min_number)]
      } else {
        scantime1 <- rev(rev(scantime1)[seq_len(min_number)])
        scantime2 <- rev(rev(scantime2)[seq_len(min_number)])
      }
      valscantime1 <- length(scantime1)
      valscantime2 <- length(scantime2)
    }
    
    ## Finally, restrict the profile matrix to the restricted data
    if (ncol(cntrPr$profMat) != valscantime1) {
      ## Find out whether we were cutting at the start or end.
      start_idx <- which(scantime1[1] == rtime(cntr))
      end_idx <- which(scantime1[length(scantime1)] == rtime(cntr))
      cntrPr$profMat <- cntrPr$profMat[, start_idx:end_idx]
    }
    if(ncol(curP$profMat) != valscantime2) {
      start_idx <- which(scantime2[1] == rtime(z))
      end_idx <- which(scantime2[length(scantime2)] == rtime(z))
      curP$profMat <- curP$profMat[, start_idx:end_idx]
    }
    
    ## ---------------------------------
    ## 2) Now match the breaks/mz range.
    ##    The -1 below is because the breaks define the upper and lower
    ##    boundary. Have to do it that way to be in line with the orignal
    ##    code... would be better to use the breaks as is.
    
    mzr1 <- c(cntrPr$breaks[1], cntrPr$breaks[length(cntrPr$breaks) - 1])
    mzr2 <- c(curP$breaks[1], curP$breaks[length(curP$breaks) - 1])
    mzmin <- min(c(mzr1[1], mzr2[1]))
    mzmax <- max(c(mzr1[2], mzr2[2]))
    mzs <- seq(mzmin, mzmax, by = parms$binSize)
    
    ## Eventually add empty rows at the beginning
    if (mzmin < mzr1[1]) {
      tmp <- matrix(0, (length(seq(mzmin, mzr1[1], parms$binSize)) - 1),
                    ncol = ncol(cntrPr$profMat))
      cntrPr$profMat <- rbind(tmp, cntrPr$profMat)
    }
    
    ## Eventually add empty rows at the end
    if (mzmax > mzr1[2]) {
      tmp <- matrix(0, (length(seq(mzr1[2], mzmax, parms$binSize)) - 1),
                    ncol = ncol(cntrPr$profMat))
      cntrPr$profMat <- rbind(cntrPr$profMat, tmp)
    }
    
    ## Eventually add empty rows at the beginning
    if (mzmin < mzr2[1]) {
      tmp <- matrix(0, (length(seq(mzmin, mzr2[1], parms$binSize)) - 1),
                    ncol = ncol(curP$profMat))
      curP$profMat <- rbind(tmp, curP$profMat)
    }
    
    ## Eventually add empty rows at the end
    if (mzmax > mzr2[2]) {
      tmp <- matrix(0, (length(seq(mzr2[2], mzmax, parms$binSize)) - 1),
                    ncol = ncol(curP$profMat))
      curP$profMat <- rbind(curP$profMat, tmp)
    }
    
    ## A final check of the data.
    mzvals <- length(mzs)
    cntrVals <- length(cntrPr$profMat)
    curVals <- length(curP$profMat)
    
    if ((mzvals * valscantime1) != cntrVals | (mzvals * valscantime2) != curVals)
      stop("Dimensions of profile matrices of files ",
           basename(fileNames(cntr)), " and ", basename(fileNames(z)),
           " do not match!")

    if (.on.public.web() & !.optimize_switch){
      
      #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
      #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
      
    }
    

    ## Done with preparatory stuff - now I can perform the alignment.
    rtadj <- R_set_obiwarpR(valscantime1, scantime1, mzvals, mzs, cntrPr, valscantime2, scantime2, curP, parms);


    if (.on.public.web() & !.optimize_switch){
      
      #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
      #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);
      
    }
 
    if (length(rtime(z)) != valscantime2) {
      nrt <- length(rtime(z))
      adj_starts_at <- which(rtime(z) == scantime2[1])
      adj_ends_at <- which(rtime(z) == scantime2[length(scantime2)])
      if (adj_ends_at < nrt)
        rtadj <- c(rtadj, rtadj[length(rtadj)] +
                     cumsum(diff(rtime(z)[adj_ends_at:nrt])))
      if (adj_starts_at > 1)
        rtadj <- c(rtadj[1] +
                     rev(cumsum(diff(rtime(z)[adj_starts_at:1]))), rtadj)
    }
    
    MessageOutput("OK", "\n", NULL)
    
    if (.on.public.web() & !.optimize_switch){

      MessageOutput(paste0("Aligning ", basename(fileNames(z)), " against ", basename(fileNames(cntr)), " ... OK\n"), 
                    "", 
                    NULL)

      #print_mes <- paste0("Aligning ", basename(fileNames(z)), " against ", basename(fileNames(cntr)), " ... OK\n");    
      #write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
      #write.table(66.0, file = paste0(fullUserPath, "log_progress.txt"),row.names = FALSE,col.names = FALSE);

    }
    
    return(unname(rtadj))
    
  }, cntr = centerObject, cntrPr = profCtr, parms = param)
  
  ## Create result
  adjRt <- vector("list", total_samples)
  adjRt[subs[centerSample]] <- list(unname(rtime(centerObject)))
  adjRt[subs[-centerSample]] <- res
  adjustRtimeSubset(rtraw, adjRt, subset = subs, method = "average")
  
}

#' @title PerformPeakFiling
#' @description PerformPeakFiling
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam().
#' @export
#' @return will return an mSet object with peak gaps filled
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' @examples 
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#'                full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' SetGlobalParallel(1);
#' mSet <- PerformPeakFiling(mSet);
#' register(bpstop());
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)

PerformPeakFiling <- function(mSet, BPPARAM=bpparam()){
  
  if(missing(mSet) & file.exists("mSet.rda")){
    load("mSet.rda")
  } else if(missing(mSet) & !file.exists("mSet.rda")){
    if(.on.public.web()){
      MessageOutput("ERROR: mSet is missing !", NULL, NULL);
      stop();
    } else {
      stop("mSet is missing ! Please make sure mSet is well defined !");
    }
  }
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  param <- mSet@params;
  
  if(length(mSet@rawOnDisk) == 0 & length(mSet@rawInMemory) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No MS data imported, please import the MS data with 'ImportRawMSData' first !", NULL, NULL);
      stop();
    } else {
      stop("No MS data Imported, please import the MS data with 'ImportRawMSData' first !");
    }
  }
  
  if(length(mSet@peakRTcorrection) == 0){
    if(.on.public.web()){
      MessageOutput("ERROR: No Retention Time Correction results found. Please \"PerformRTcorrection\" or \"PerformPeakAlignment\" first !");
      stop();
    } else {
      stop("No Retention Time Correction results found. Please \"PerformRTcorrection\" or \"PerformPeakAlignment\" first !");
    }
  }  
  ## Preparing Something
  if (!.optimize_switch) {
    MessageOutput(paste("Starting peak filling!"), ecol = "\n", 74)
  } 
  
  fixedRt <- fixedMz <- expandRt <- expandMz <- 0
  
  if (is.null(param$ppm)){
    warning("No ppm detected, will use ppm = 10 instead for peak filling !")
    ppm <-10
  } else {
    ppm <- param$ppm;
  }
  
  if (!.optimize_switch){
    MessageOutput(paste("Defining peak areas for filling-in."), ecol = "", NULL)
  } 
  
  aggFunLow <- median
  aggFunHigh <- median;

  ngroup <- length(mSet@peakgrouping);
  #tmp_pks <- mSet$msFeatureData$chromPeaks[, c("rtmin", "rtmax", "mzmin", "mzmax")];
  tmp_pks <- mSet@peakRTcorrection$chromPeaks[, c("rtmin", "rtmax", "mzmin", "mzmax")];
  ### complete peak table should be kept here
  fdef <- mSet@peakgrouping[[ngroup]];
  
  pkArea <- do.call(rbind,
                    lapply(fdef$peakidx, function(z) {
                      pa <- c(aggFunLow(tmp_pks[z, 1]),
                              aggFunHigh(tmp_pks[z, 2]),
                              aggFunLow(tmp_pks[z, 3]),
                              aggFunHigh(tmp_pks[z, 4]))
    ## Check if we have to apply ppm replacement:
    if (ppm != 0) {
      mzmean <- mean(pa[3:4])
      tittle <- mzmean * (ppm / 2) / 1E6
      if ((pa[4] - pa[3]) < (tittle * 2)) {
        pa[3] <- mzmean - tittle
        pa[4] <- mzmean + tittle
      }
    }
    ## Expand it.
    if (expandRt != 0) {
      diffRt <- (pa[2] - pa[1]) * expandRt / 2
      pa[1] <- pa[1] - diffRt
      pa[2] <- pa[2] + diffRt
    }
    if (expandMz != 0) {
      diffMz <- (pa[4] - pa[3]) * expandMz / 2
      pa[3] <- pa[3] - diffMz
      pa[4] <- pa[4] + diffMz
    }
    if (fixedMz != 0) {
      pa[3] <- pa[3] - fixedMz
      pa[4] <- pa[4] + fixedMz
    }
    if (fixedRt != 0) {
      pa[1] <- pa[1] - fixedRt
      pa[2] <- pa[2] + fixedRt
    }
    pa
  }))
  
  rm(tmp_pks)
  
  if (!.optimize_switch){
    MessageOutput(".", ecol = "", 75)
  }
  
  colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax")
  ## Add mzmed column - needed for MSW peak filling.
  pkArea <- cbind(group_idx = seq_len(nrow(pkArea)), pkArea,
                  mzmed = as.numeric(fdef$mzmed))
  
  if (length(mSet@rawOnDisk) != 0){
    
    if(is.null(param$BlankSub)){
      param$BlankSub <- FALSE;
    }
    if(param$BlankSub){
      specdata0 <- mSet@rawOnDisk;
      blk2rms <- which(specdata0@phenoData@data[["sample_group"]] == "BLANK");
      fdnew <- specdata0@featureData
      fdnew@data <- fdnew@data[!(fdnew@data[["fileIdx"]] %in% blk2rms),];
      #fdnew@data[["fileIdx"]] <- fdnew@data[["fileIdx"]] - length(blk2rms)
      ii <- 1;
      for(fii in unique(fdnew@data[["fileIdx"]])){
        fdnew@data[["fileIdx"]][fdnew@data[["fileIdx"]] == fii] <- ii;
        ii <- ii + 1
      }
      
      pdnew <- specdata0@phenoData;
      pdnew@data <- pdnew@data[-blk2rms,];
      numfiles <- length(specdata0@experimentData@instrumentModel) - length(blk2rms);
      
      specdata <- new(
        "OnDiskMSnExp",
        processingData = new("MSnProcess",
                             files = specdata0@processingData@files[-blk2rms]),
        featureData = fdnew,
        phenoData = pdnew,
        experimentData = new("MIAPE",
                             instrumentManufacturer = rep("a",numfiles),
                             instrumentModel = rep("a",numfiles),
                             ionSource = rep("a",numfiles),
                             analyser = rep("a",numfiles),
                             detectorType = rep("a",numfiles)))
    } else {
      specdata <- mSet@rawOnDisk;
    }
  } else {
    specdata <- mSet@rawInMemory;
  }
  
  fNames <- specdata@phenoData@data[["sample_name"]]
  #pks <- mSet$msFeatureData$chromPeaks
  pks <- mSet@peakRTcorrection[["chromPeaks"]]
  
  # if(!is.null(param$BlankSub)){
  #   if(param$BlankSub){
  #     fNames <- fNames[specdata@phenoData@data$sample_group != "BLANK"]
  #     #pks <- pks[pks[,"sample"] !=0,]
  #   }
  # }
  
  pkGrpVal <-
    .feature_values(pks = pks, fts = mSet@peakgrouping[[ngroup]],
                    method = "medret", value = "index",
                    intensity = "into", colnames = fNames,
                    missing = NA)
  
  if (!.optimize_switch){
    MessageOutput(".","",76)
  }
  
  ## Check if there is anything to fill...
  if (!any(is.na(rowSums(pkGrpVal)))) {
    if (!.optimize_switch){
      MessageOutput("\nNo missing peaks present.","\n",76)
    }
    # mSet@peakfilling <-""; # need to save something here
    mSet@peakfilling[["msFeatureData"]][["chromPeaks"]] <- 
      mSet@peakRTcorrection$chromPeaks;
    
    mSet@peakfilling[["msFeatureData"]][["adjustedRT"]] <- 
      mSet@peakRTcorrection$adjustedRT;
    
    mSet@peakfilling[["msFeatureData"]][["chromPeakData"]] <- 
      mSet@peakpicking[["chromPeakData"]];
    
    mSet@peakfilling[["FeatureGroupTable"]] <-
      mSet@peakgrouping[[length(mSet@peakgrouping)]];
    
    if(.on.public.web() & !.optimize_switch){
      save(mSet, file = "mSet.rda");
    }
    
    return(mSet)
  }
  
  if (!.optimize_switch){
    MessageOutput(".","",76)
  }
  
  ## Split the object by file and define the peaks for which
  pkAreaL <- objectL <- vector("list", length(fNames))
  
  ## We need "only" a list of OnDiskMSnExp, one for each file but
  ## instead of filtering by file we create small objects to keep
  ## memory requirement to a minimum.
  if (is(specdata,"OnDiskMSnExp")){
    
    req_fcol <- requiredFvarLabels("OnDiskMSnExp")
    min_fdata <- specdata@featureData@data[, req_fcol]
    
    ###
    res <- mSet@peakRTcorrection$adjustedRT
    res <- unlist(res, use.names = FALSE)
    
    fidx <- fData(specdata)$fileIdx # a issue for inmemory data
    names(fidx) <- featureNames(specdata)
    sNames <- unlist(split(featureNames(specdata), fidx),
                     use.names = FALSE)
    
    names(res) <- sNames
    res <- res[featureNames(specdata)]
    
    min_fdata$retentionTime <- res
    
    for (i in seq_along(specdata@phenoData@data[["sample_name"]])) {
      
      fd <- min_fdata[min_fdata$fileIdx == i, ]
      fd$fileIdx <- 1
      
      objectL[[i]] <- new(
        "OnDiskMSnExp",
        processingData = new("MSnProcess",
                             files = specdata@processingData@files[i]),
        featureData = new("AnnotatedDataFrame", fd),
        phenoData = new("NAnnotatedDataFrame",
                        data.frame(sampleNames = "1")),
        experimentData = new("MIAPE",
                             instrumentManufacturer = "a",
                             instrumentModel = "a",
                             ionSource = "a",
                             analyser = "a",
                             detectorType = "a"))
      
      ## Want to extract intensities only for peaks that were not
      ## found in a sample.
      
      pkAreaL[[i]] <- pkArea[is.na(pkGrpVal[, i]), , drop = FALSE]
    }
    
    if (!.optimize_switch){
      MessageOutput(paste("OK\nStart integrating peak areas from original files..."), "\n", 77)
    } 
    
    cp_colnames <- colnames(mSet@peakRTcorrection[["chromPeaks"]])
    
    ## Extraction designed for centWave
    res <- bpmapply(FUN = .getChromPeakData, 
                    objectL,
                    pkAreaL, 
                    as.list(seq_along(objectL)),
                    MoreArgs = list(cn = cp_colnames,
                                    mzCenterFun = "weighted.mean"),
                    BPPARAM = BPPARAM, 
                    SIMPLIFY = FALSE)
    
    register(bpstop());
    
  } else {

    mzCenterFun = "weighted.mean";
    scan_set<-names(specdata@assayData);
    scan_set_ordered <- sort(scan_set);
    
    assayData <- sapply(scan_set_ordered, FUN = function(x){specdata@assayData[[x]]})
    
    #assayData <- lapply(scan_set,FUN=function(x){mSet[[format]]@assayData[[x]]});
    assayData <- split(assayData,fromFile(specdata));
    
    #rtim_all <- split(rtime(specdata),fromFile(specdata)) has to used the corrected RT
    rtim_all <- mSet@peakRTcorrection$adjustedRT;
    filesname <- basename(MSnbase::fileNames(specdata));
    res_new <-list();
    
    for (ii in seq_along(specdata@phenoData@data[["sample_name"]])) {
      
      cn <-colnames(mSet@peakRTcorrection$chromPeaks);
      peakArea <- pkArea[is.na(pkGrpVal[, ii]), , drop = FALSE];
      
      ncols <- length(cn);
      res <- matrix(ncol = ncols, nrow = nrow(peakArea));
      colnames(res) <- cn;
      
      res[, "sample"] <- ii;
      res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")];
      ## Load the data

      if (!.optimize_switch){
        MessageOutput(mes = paste0("Requesting ", nrow(res), " peaks from ", filesname[ii], " ... "), "", NULL)          
      } 
      
      spctr <- assayData[[ii]];
      mzs <- lapply(spctr, mz);
      valsPerSpect <- lengths(mzs);
      ints <- unlist(lapply(spctr, intensity), use.names = FALSE);
      rm(spctr);
      
      mzs <- unlist(mzs, use.names = FALSE)
      mzs_range <- range(mzs)
      rtim <- rtim_all[[ii]]
      rtim_range <- range(rtim)
      
      for (i in seq_len(nrow(res))) {
        rtr <- peakArea[i, c("rtmin", "rtmax")]
        mzr <- peakArea[i, c("mzmin", "mzmax")]
        ## If the rt range is completely out; additional fix for #267
        if (rtr[2] < rtim_range[1] | rtr[1] > rtim_range[2] |
            mzr[2] < mzs_range[1] | mzr[1] > mzs_range[2]) {
          res[i, ] <- rep(NA_real_, ncols)
          next
        }
        ## Ensure that the mz and rt region is within the range of the data.
        rtr[1] <- max(rtr[1], rtim_range[1])
        rtr[2] <- min(rtr[2], rtim_range[2])
        mzr[1] <- max(mzr[1], mzs_range[1])
        mzr[2] <- min(mzr[2], mzs_range[2])
        mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim,
                       valsPerSpect = valsPerSpect, rtrange = rtr,
                       mzrange = mzr)
        
        if (length(mtx)) {
          if (any(!is.na(mtx[, 3]))) {
            res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) *
              ((rtr[2] - rtr[1]) /
                 max(1, (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)))
            maxi <- which.max(mtx[, 3])
            res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)]
            res[i, c("rtmin", "rtmax")] <- rtr
            ## Calculate the intensity weighted mean mz
            meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3]))
            if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2]
            res[i, "mz"] <- meanMz
          } else {
            res[i, ] <- rep(NA_real_, ncols)
          }
        } else {
          res[i, ] <- rep(NA_real_, ncols)
        }
      }
      
      if (!.optimize_switch) {
        MessageOutput(
          paste0("got ", sum(!is.na(res[, "into"])), "."),
          "\n",
          77 + ii / length(specdata@phenoData@data[["sample_name"]]) * 10
        )
      }
      res_new[[ii]] <- res;
      pkAreaL[[ii]] <- pkArea[is.na(pkGrpVal[, ii]), , drop = FALSE];
    }
    
    res <-res_new;
  }
  
  res <- do.call(rbind, res)
  ## cbind the group_idx column to track the feature/peak group.
  res <- cbind(res, group_idx = do.call(rbind, pkAreaL)[, "group_idx"])
  ## Remove those without a signal
  res <- res[!is.na(res[, "into"]), , drop = FALSE]
  
  if (nrow(res) == 0) {
    if(!.optimize_switch){
      warning("Could not integrate any signal for the missing ",
              "peaks! Consider increasing 'expandMz' and 'expandRt'.");
    }
    
    if(.on.public.web() & !.optimize_switch){
      save(mSet, file = "mSet.rda");
    }
    
    return(mSet)
  }
  
  ## Get the msFeatureData:
  newFd <- mSet@peakRTcorrection
  
  incr <- nrow(mSet@peakRTcorrection[["chromPeaks"]])
  for (i in unique(res[, "group_idx"])) {
    fdef$peakidx[[i]] <- c(fdef$peakidx[[i]],
                           (which(res[, "group_idx"] == i) + incr))
  }
  ## Define IDs for the new peaks; include fix for issue #347
  maxId <- max(as.numeric(sub("^CP", "",
                              rownames(mSet@peakRTcorrection[["chromPeaks"]]))))
  if (maxId < 1)
    stop("chromPeaks matrix lacks rownames; please update ",
         "'object' with the 'updateObject' function.")
  toId <- maxId + nrow(res)
  
  rownames(res) <- sprintf(
    paste0("CP", "%0", ceiling(log10(toId + 1L)), "d"),
    (maxId + 1L):toId)
  
  newFd[["chromPeaks"]] <- rbind(mSet@peakRTcorrection[["chromPeaks"]],
                                 res[, -ncol(res)])
  
  cpd <- mSet@peakpicking[["chromPeakData"]][rep(1L, nrow(res)), , drop = FALSE]
  cpd[,] <- NA
  cpd$ms_level <- as.integer(1)
  cpd$is_filled <- TRUE
  if(length(cpd$Bank2rm) > 0){
    cpd$Bank2rm <- FALSE
  }
  
  if(param$BlankSub){
    incluVec <- mSet@peakpicking[["chromPeakData"]]@listData[["Bank2rm"]];
    newFd$chromPeakData <- rbind(mSet@peakpicking[["chromPeakData"]][!incluVec,], cpd);
    # newFd[["chromPeaks"]] <- rbind(mSet@peakRTcorrection[["chromPeaks"]][!incluVec,],
    #                                res[, -ncol(res)]);
    # also need to correct group peak idx
    # rnms <- row.names(newFd[["chromPeaks"]]);
    # oldidx <- as.numeric(gsub("^CP", "",rnms));
    # newidx <- c(1:length(oldidx));
    # fdef@listData[["peakidx"]] <- lapply(fdef@listData[["peakidx"]], function(x){
    #   newidx[match(x, oldidx)]
    # })
  } else {
    newFd$chromPeakData <- rbind(mSet@peakpicking[["chromPeakData"]], cpd)
  }
  
  rownames(newFd$chromPeakData) <- rownames(newFd$chromPeaks);
  
  mSet@peakfilling$msFeatureData <- newFd;
  mSet@peakfilling$FeatureGroupTable <- fdef;

  if(.on.public.web() & !.optimize_switch){
    save(mSet, file = "mSet.rda");
  }
  
  register(bpstop());
  
  return(mSet)
}

#' @title PerformBlankSubstraction
#' @description PerformBlankSubstraction
#' @param mSet the mSet object generated by PerformPeakPicking function.
#' @param BPPARAM parallel method used for data processing. Default is bpparam().
#' @export
#' @noRd
#' @return will return an mSet object with peak gaps filled
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
PerformBlankSubstraction <- function(mSet){
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  
  if(!mSet@params$BlankSub){
    if(!.optimize_switch){
      MessageOutput("Blank substraction will not be performed!", ecol = "\n")
    }
    return(mSet)
  }
  
  if(!is(mSet@peakgrouping[[1]], "DFrame")){
    stop("mSet object doesnot include peak grouping info! Please 'PerformPeakGrouping' first!")
  }
  dm <- mSet@peakgrouping[[1]]@listData
  dm$peakidx <- NULL;
  if(is.null(dm$BLANK)){
    mSet@params[["BlankSub"]] <- FALSE;
    MessageOutput("No blank sample included based on grouping info. Skipped!", ecol = "\n")
    return(mSet)
  }
  chromPeaks <- as.data.frame(mSet@peakpicking[["chromPeaks"]]);
  peakidx <- mSet@peakgrouping[[1]]@listData[["peakidx"]];
  BlkPeak <- which(dm$BLANK != 0);
  BlkSample <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] == "BLANK");
  RegSample <- which(mSet@rawOnDisk@phenoData@data[["sample_group"]] != "BLANK");
  res <- sapply(BlkPeak, FUN = function(x) {
    idxs <- peakidx[[x]];
    feattb <- chromPeaks[idxs,]; # feature table - feattb
    blkinto <- mean(feattb$into[feattb$sample %in% BlkSample])
    smlinto <- mean(feattb$into[!(feattb$sample %in% BlkSample)])
    if(is.nan(smlinto)){return(NA)}
    smlinto/blkinto
  })
  ## Estimating the threshold for filtering
  sort_res <- sort(res, decreasing = T)
  infectPoint <- bede(x= c(1:length(sort_res)), 
       y = sort_res, 1)$iplast
  if(!is.nan(infectPoint)){
    blk_thresh <- round(sort_res[ceiling(infectPoint)],1)/5
  } else {
    blk_thresh <- round(sort_res[length(sort_res)*0.2]) # Keep top 20% FC (sample/blank) peaks
  }
  if(!is.numeric(blk_thresh)){
    MessageOutput("No threshold can be evaluted for blank substraction, will use 10 !", ecol = "\n")
    blk_thresh <- 10;
  } else {
    MessageOutput(paste0("Threshold for blank filtration has been estimated as ", blk_thresh, "..."), ecol = "\n")
  }
  
  ## Generete Inclusion list --> T is keep, F is remove
  InclusionVec <- vapply(peakidx, function(x){
    feattb <- as.data.frame(chromPeaks[x,]); # feature table - feattb
    blkinto <- mean(feattb$into[feattb$sample %in% BlkSample]);
    smlinto <- mean(feattb$into[!(feattb$sample %in% BlkSample)]);
    if(is.nan(smlinto)){
      return(FALSE)
    } else if (is.nan(blkinto)) {
      return(TRUE)
    } else if(smlinto/blkinto > blk_thresh){
      return(TRUE)
    } else {
      return(FALSE)
    }
  }, FUN.VALUE = vector(length = 1))
  
  ## Exclude blank features
  GroupingListData <- mSet@peakgrouping[[1]]@listData;
  GroupingListData_new <- lapply(GroupingListData, FUN = function(x){
    x[InclusionVec];
  })
  mSet@peakpicking[["chromPeakData"]]@listData$Bank2rm <- 
    vector(mode = "logical", 
           length = nrow(mSet@peakpicking[["chromPeakData"]]));
  chrmpks2rm <- unlist(GroupingListData[["peakidx"]][!InclusionVec])
  mSet@peakpicking[["chromPeakData"]]@listData$Bank2rm[chrmpks2rm] <- TRUE;
  
  leftBlks <- GroupingListData_new[["BLANK"]];
  GroupingListData_new[["BLANK"]] <- NULL;
  # Clean peakidx List
  blankChromPeakIdx <- which(chromPeaks$sample %in% BlkSample)
  peakidx_new <- lapply(GroupingListData_new[["peakidx"]], function(x){
    x[!(x %in% blankChromPeakIdx)]
  })
  GroupingListData_new[["peakidx"]] <- peakidx_new;
  
  mSet@peakgrouping[[1]]@listData <- GroupingListData_new;
  numFeat <- length(which(InclusionVec));
  mSet@peakgrouping[[1]]@nrows <- numFeat
  rownames(mSet@peakgrouping[[1]]) <- 
    sprintf(paste0("FT", "%0", ceiling(log10(numFeat + 1L)), "d"),
            seq(from = 1, length.out = numFeat))
  MessageOutput("Blank features have been substracted successfully!", ecol = "\n")

  return(mSet)
}


#' @title mSet2xcmsSet
#' @description mSet2xcmsSet
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787

mSet2xcmsSet <- function(mSet) {
  
  xs <- new("xcmsSet")
  #xs@peaks <- mSet[["msFeatureData"]][["chromPeaks"]]
  
  xs@peaks <- mSet@peakRTcorrection$chromPeaks;
  #fgs <- mSet$FeatureGroupTable
  fgs <- mSet@peakfilling$FeatureGroupTable
  xs@groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)])
  rownames(xs@groups) <- NULL
  xs@groupidx <- fgs$peakidx
  
  rts <- list()
  
  if (is(mSet[[2]],"OnDiskMSnExp")){
    format <- "onDiskData"
  } else {
    format <- "inMemoryData"
  }
  
  ## Ensure we're getting the raw rt
  rts$raw <- rtime(mSet[[format]])
  rts$corrected <- mSet[["msFeatureData"]][["adjustedRT"]]
  
  xs@rt <- rts
  
  ## @phenoData
  xs@phenoData <- pData(mSet[[format]])
  ## @filepaths
  xs@filepaths <- fileNames(mSet[[format]])
  
  ## @profinfo (list)
  profMethod <- NA
  profStep <- NA
  profParam <- list()
  ## If we've got any MatchedFilterParam we can take the values from there
  
  ## @mslevel <- msLevel?
  xs@mslevel <- 1
  
  ## @scanrange
  xs@scanrange <- range(scanIndex(mSet[[format]]))
  
  ## @filled ... not yet.
  if (any(mSet$msFeatureData$chromPeakData$is_filled)) {
    fld <- which(mSet$msFeatureData$chromPeakData$is_filled)
    xs@filled <- as.integer(fld)
  }
  
  return(xs)
}

#' @title updateRawSpectraParam
#' @description updateRawSpectraParam
#' @param Params object generated by SetPeakParams function.
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' @noRd
#' @references Smith, C.A. et al. 2006. {Analytical Chemistry}, 78, 779-787
#' Mcgill University
#' License: GNU GPL (>= 2)

updateRawSpectraParam <- function (Params){
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  param <- list();
  
  # 0. Method Define
  param$Peak_method <- Params$Peak_method
  param$RT_method <- Params$RT_method
  
  # 1.1 update peak picking params - centWave
  if (param$Peak_method == "centWave"){
    
    param$ppm <- as.numeric(Params[["ppm"]]);
    param$peakwidth <- c(as.numeric(Params[["min_peakwidth"]]),
                         as.numeric(Params[["max_peakwidth"]]));
    if (param[["peakwidth"]][1] > param[["peakwidth"]][2]){
      save(Params, file="Params_error.rda");
      stop("min_peakwidth has to be less than max_peakwidth ! Please adjust...");
    };
    
    param$snthresh <- as.numeric(Params[["snthresh"]]);
    param$prefilter <- c(as.numeric(Params[["prefilter"]]), 
                         as.numeric(Params[["value_of_prefilter"]]));
    param$roiList <- list();
    param$firstBaselineCheck <- TRUE;
    param$roiScales <- numeric(0);
    
    param$mzCenterFun <- Params[["mzCenterFun"]];
    param$integrate <- Params[["integrate"]];
    param$mzdiff <- as.numeric(Params[["mzdiff"]]);
    param$fitgauss <- as.logical(Params[["fitgauss"]]);
    param$noise <- as.numeric(Params[["noise"]]);
    param$verboseColumns <- as.logical(Params[["verbose.columns"]]);
    
    param$binSize <-0.25; # density Param
    
  } else if (param$Peak_method == "matchedFilter") {
    
    param$fwhm <- as.numeric(Params$fwhm);
    param$sigma <- as.numeric(Params$sigma);
    param$steps <- as.numeric(Params$steps);
    param$snthresh <- as.numeric(Params$snthresh);
    param$peakBinSize <- as.numeric(Params$peakBinSize);
    
    param$mzdiff <- as.numeric(Params$mzdiff)
    param$bw <- as.numeric(Params$bw)
    param$max <- as.numeric(Params$max)
    
    param$impute <- "none";
    param$baseValue<- numeric(0);
    param$distance<- numeric(0);
    param$index<- FALSE;
    
    param$binSize <-0.25; # density Param
    
  } else if (param$Peak_method == "Massifquant") {
    
    param$ppm <- as.numeric(Params[["ppm"]]);
    param$peakwidth <- c(as.numeric(Params[["min_peakwidth"]]),
                         as.numeric(Params[["max_peakwidth"]]));
    if (param[["peakwidth"]][1] > param[["peakwidth"]][2]){
      stop("min_peakwidth has to be less than max_peakwidth ! Please adjust...");
    };
    
    param$snthresh <- as.numeric(Params[["snthresh"]]);
    param$prefilter <- c(as.numeric(Params[["prefilter"]]), 
                         as.numeric(Params[["value_of_prefilter"]]));
    param$roiList <- list();
    param$firstBaselineCheck <- TRUE;
    param$roiScales <- numeric(0);
    
    param$mzCenterFun <- Params[["mzCenterFun"]];
    param$integrate <- Params[["integrate"]];
    param$mzdiff <- as.numeric(Params[["mzdiff"]]);
    param$fitgauss <- as.logical(Params[["fitgauss"]]);
    param$noise <- as.numeric(Params[["noise"]]);
    param$verboseColumns <- as.logical(Params[["verbose.columns"]]);
    
    param$binSize <-0.25; # density Param
    
    # Specific Parameters
    param$criticalValue <- as.numeric(Params[["criticalValue"]]);
    param$consecMissedLimit <- as.numeric(Params[["consecMissedLimit"]]);
    param$unions <- as.numeric(Params[["unions"]]);
    param$checkBack <- as.numeric(Params[["checkBack"]]);
    param$withWave <- as.logical(Params[["withWave"]]);
    
  }
  # 2.1 update grouping params - density
  
  param$bw<-as.numeric(Params[["bw"]]);
  param$minFraction <- as.numeric(Params[["minFraction"]]);
  param$minSamples <- as.numeric(Params[["minSamples"]]);
  param$maxFeatures <- as.numeric(Params[["maxFeatures"]]);
  
  if (param[["minFraction"]][1] > 1.0) {
    stop("minFraction can not be larger than 1.0 ! Please adjust ...");
  };  
  
  # 3.1 update RT correction params - peakgroup
  
  param$extraPeaks <- as.numeric(Params[["extra"]]);
  
  param$smooth <- Params[["smooth"]];
  param$span <- as.numeric(Params[["span"]]);
  param$family <- Params[["family"]];
  
  param$subsetAdjust <- "average";
  
  # 4. Swith of Blank substraction
  param$BlankSub <- Params$BlankSub; # testing...
  
  # Finished !
  .optimize_switch <- .SwapEnv$.optimize_switch;
  if(is.null(.optimize_switch)){
    .optimize_switch <- FALSE;
  }
  if (.on.public.web() & !.optimize_switch){
    MessageOutput(paste("Parameters for",param$Peak_method, "have been successfully parsed!"), "\n", NULL);
  }
  
  return(param)
}

#' @title creatPeakTable
#' @description creatPeakTable
#' @param mSet mSet object, usually generated by 'PerformPeakAnnotation' here.
#' @noRd
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada
#' License: GNU GPL (>= 2)

creatPeakTable <- function(mSet){
  
  if (length(mSet@rawInMemory@phenoData[["sample_name"]]) == 1) {
    return(mSet@peakfilling$msFeatureData$chromPeaks)
  }
  
  fgs <- mSet@peakfilling$FeatureGroupTable;
  groupmat <- S4Vectors::as.matrix(fgs[, -ncol(fgs)]);
  rownames(groupmat) <- NULL;
  
  ts <- data.frame(cbind(groupmat,.groupval(mSet, value="into")), row.names = NULL)
  
  cnames <- colnames(ts)
  
  if (cnames[1] == 'mzmed') {
    cnames[1] <- 'mz'
  } else {
    stop ('mzmed column missing')
  }
  if (cnames[4] == 'rtmed') {
    cnames[4] <- 'rt'
  } else {
    stop ('mzmed column missing')
  }
  
  colnames(ts) <- cnames
  
  return(ts)
}

#' @title updateRawSpectraPath
#' @description updateRawSpectraPath
#' @param mSet mSet object generated by ImportRawMSData or the following functions.
#' @param newPath Character vector, a character vector specify the absolute path of the new raw spectra files.
#' @export
#' @return will return an mSet object with raw files' path updated
#' @examples 
#' data(mSet);
#' newPath <- dir(system.file("mzData", package = "mtbls2"),
#'                full.names = TRUE, recursive = TRUE)[c(10, 11, 12)]
#' mSet <- updateRawSpectraPath(mSet, newPath);
#' @author Zhiqiang Pang, Jeff Xia \email{jeff.xia@mcgill.ca}
#' McGill University, Canada

updateRawSpectraPath <- function(mSet, newPath){

  oldPathLength <- length(mSet@rawOnDisk@processingData@files);
  newPathLength <- length(newPath);
  
  if(oldPathLength != newPathLength){
    stop("newPath define a wrong path character because the files number is incorrect !")
  }
  
  if(!all(sapply(newPath, file.exists))){
    stop("Not all new files exits, please check !")
  }
  
  mSet@rawOnDisk@processingData@files <- newPath;
  return(mSet);
  
}


########         -----------=========== Internal Functions From XCMS ===========-------------        #
# 
#' @references Smith, C.A., Want, E.J., O'Maille, G., Abagyan,R., Siuzdak, G. (2006). 
#               "XCMS: Processing mass spectrometry data for metabolite profiling using nonlinear 
#                 peak alignment, matching and identification." Analytical Chemistry, 78, 779-787.

adjustRtimeSubset <- function(rtraw, rtadj, subset,
                              method = c("average", "previous")) {
  method <- match.arg(method)
  if (length(rtraw) != length(rtadj))
    stop("Lengths of 'rtraw' and 'rtadj' have to match.")
  if (missing(subset))
    subset <- seq_along(rtraw)
  if (!all(subset %in% seq_along(rtraw)))
    stop("'subset' is out of bounds.")

  no_subset <- seq_len(length(rtraw))[-subset]
  for (i in no_subset) {
    message("Aligning sample number ", i, " against subset ... ",
            appendLF = TRUE)
    if (method == "previous") {
      i_adj <- .get_closest_index(i, subset, method = "previous")
      rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rtraw[[i_adj]],
                                       rtadj[[i_adj]])
    }
    if (method == "average") {
      i_ref <- c(.get_closest_index(i, subset, method = "previous"),
                 .get_closest_index(i, subset, method = "next"))
      trim_idx <- .match_trim_vector_index(rtraw[i_ref])
      rt_raw_ref <- do.call(
        cbind, .match_trim_vectors(rtraw[i_ref], idxs = trim_idx))
      rt_adj_ref <- do.call(
        cbind, .match_trim_vectors(rtadj[i_ref], idxs = trim_idx))
      wghts <- 1 / abs(i_ref - i) # weights depending on distance to i
      rt_raw_ref <- apply(rt_raw_ref, 1, weighted.mean, w = wghts)
      rt_adj_ref <- apply(rt_adj_ref, 1, weighted.mean, w = wghts)
      rtadj[[i]] <- .applyRtAdjustment(rtraw[[i]], rt_raw_ref,
                                       rt_adj_ref)
    }
    message("OK")
  }
  rtadj
}

OptiChromPeaks <- function(pks, bySample = FALSE,
                           rt = numeric(), mz = numeric(),
                           ppm = 0, msLevel = integer(),
                           type = c("any", "within",
                                    "apex_within"),
                           isFilledColumn = FALSE) {
  type <- match.arg(type)
  # if (isFilledColumn)
  #   pks <- cbind(pks, is_filled = as.numeric(chromPeakData(object)$is_filled))
  # if (length(msLevel))
  #   pks <- pks[which(chromPeakData(object)$ms_level %in% msLevel), ,
  #              drop = FALSE]
  ## Select peaks within rt range.
  if (length(rt)) {
    rt <- range(rt)
    if (type == "any")
      keep <- which(pks[, "rtmin"] <= rt[2] & pks[, "rtmax"] >= rt[1])
    if (type == "within")
      keep <- which(pks[, "rtmin"] >= rt[1] & pks[, "rtmax"] <= rt[2])
    if (type == "apex_within")
      keep <- which(pks[, "rt"] >= rt[1] & pks[, "rt"] <= rt[2])
    pks <- pks[keep, , drop = FALSE]
  }
  ## Select peaks within mz range, considering also ppm
  if (length(mz) && length(pks)) {
    mz <- range(mz)
    ## Increase mz by ppm.
    if (is.finite(mz[1]))
      mz[1] <- mz[1] - mz[1] * ppm / 1e6
    if (is.finite(mz[2]))
      mz[2] <- mz[2] + mz[2] * ppm / 1e6
    if (type == "any")
      keep <- which(pks[, "mzmin"] <= mz[2] & pks[, "mzmax"] >= mz[1])
    if (type == "within")
      keep <- which(pks[, "mzmin"] >= mz[1] & pks[, "mzmax"] <= mz[2])
    if (type == "apex_within")
      keep <- which(pks[, "mz"] >= mz[1] & pks[, "mz"] <= mz[2])
    pks <- pks[keep, , drop = FALSE]
  }
  
  pks
}
getLocalNoiseEstimate <- function(d, td, ftd, noiserange, Nscantime, threshold, num) {
  
  if (length(d) < Nscantime) {
    
    ## noiserange[2] is full d-range
    drange <- which(td %in% ftd)
    n1 <- d[-drange] ## region outside the detected ROI (wide)
    n1.cp <- continuousPtsAboveThresholdIdxR(n1, threshold=threshold,num=num) ## continousPtsAboveThreshold (probably peak) are subtracted from data for local noise estimation
    n1 <- n1[!n1.cp]
    if (length(n1) > 1)  {
      baseline1 <- mean(n1)
      sdnoise1 <- sd(n1)
    } else
      baseline1 <- sdnoise1 <- 1
    
    ## noiserange[1]
    d1 <- drange[1]
    d2 <- drange[length(drange)]
    nrange2 <- c(max(1,d1 - noiserange[1]) : d1, d2 : min(length(d),d2 + noiserange[1]))
    n2 <- d[nrange2] ## region outside the detected ROI (narrow)
    n2.cp <- continuousPtsAboveThresholdIdxR(n2, threshold=threshold,num=num) ## continousPtsAboveThreshold (probably peak) are subtracted from data for local noise estimation
    n2 <- n2[!n2.cp]
    if (length(n2) > 1)  {
      baseline2 <- mean(n2)
      sdnoise2 <- sd(n2)
    } else
      baseline2 <- sdnoise2 <- 1
    
  } else {
    trimmed <- trimm(d,c(0.05,0.95))
    baseline1 <- baseline2 <- mean(trimmed)
    sdnoise1 <- sdnoise2 <- sd(trimmed)
  }
  
  c(min(baseline1,baseline2),min(sdnoise1,sdnoise2))
}

#' @importFrom stats convolve
MSW.cwt <- function (ms, scales = 1, wavelet = "mexh") { ## modified from package MassSpecWavelet
  if (wavelet == "mexh") {
    psi_xval <- seq(-6, 6, length = 256)
    psi <- (2/sqrt(3) * pi^(-0.25)) * (1 - psi_xval^2) *
      exp(-psi_xval^2/2)
  }
  else if (is.matrix(wavelet)) {
    if (nrow(wavelet) == 2) {
      psi_xval <- wavelet[1, ]
      psi <- wavelet[2, ]
    }
    else if (ncol(wavelet) == 2) {
      psi_xval <- wavelet[, 1]
      psi <- wavelet[, 2]
    }
    else {
      stop("Unsupported wavelet format!")
    }
  }
  else {
    stop("Unsupported wavelet!")
  }
  oldLen <- length(ms)
  ms <- MSW.extendNBase(ms, nLevel = NULL, base = 2)
  len <- length(ms)
  nbscales <- length(scales)
  wCoefs <- NULL
  psi_xval <- psi_xval - psi_xval[1]
  dxval <- psi_xval[2]
  xmax <- psi_xval[length(psi_xval)]
  for (i in seq_along(scales)) {
    scale.i <- scales[i]
    f <- rep(0, len)
    j <- 1 + floor((0:(scale.i * xmax))/(scale.i * dxval))
    if (length(j) == 1)
      j <- c(1, 1)
    lenWave <- length(j)
    f[seq_len(lenWave)] <- rev(psi[j]) - mean(psi[j])
    if (length(f) > len)
    {i<-i-1;break;}   ##  stop(paste("scale", scale.i, "is too large!"))
    wCoefs.i <- 1/sqrt(scale.i) * convolve(ms, f)
    wCoefs.i <- c(wCoefs.i[(len - floor(lenWave/2) + 1):len],
                  wCoefs.i[seq_len(len - floor(lenWave/2))])
    wCoefs <- cbind(wCoefs, wCoefs.i)
  }
  if (i < 1) return(NA)
  scales <- scales[seq_len(i)]
  if (length(scales) == 1)
    wCoefs <- matrix(wCoefs, ncol = 1)
  colnames(wCoefs) <- scales
  wCoefs <- wCoefs[seq_len(oldLen), , drop = FALSE]
  wCoefs
}

#' @importFrom stats nextn
MSW.extendNBase <- function(x, nLevel=1, base=2, ...) { ## from package MassSpecWavelet
  if (!is.matrix(x)) x <- matrix(x, ncol=1)
  
  nR <- nrow(x)
  if (is.null(nLevel)) {
    nR1 <- nextn(nR, base)
  } else {
    nR1 <- ceiling(nR / base^nLevel) * base^nLevel
  }
  if (nR != nR1) {
    x <- MSW.extendLength(x, addLength=nR1-nR, ...)
  }
  x
}
MSW.extendLength <-   function(x, addLength=NULL, method=c('reflection', 'open', 'circular'), direction=c('right', 'left', 'both'))  {       ## from package MassSpecWavelet
  if (is.null(addLength)) stop('Please provide the length to be added!')
  if (!is.matrix(x)) x <- matrix(x, ncol=1)
  method <- match.arg(method)
  direction <- match.arg(direction)
  
  nR <- nrow(x)
  nR1 <- nR + addLength
  if (direction == 'both') {
    left <- right <- addLength
  } else if (direction == 'right') {
    left <- 0
    right <- addLength
  } else if (direction == 'left') {
    left <- addLength
    right <- 0
  }
  
  if (right > 0) {
    x <- switch(method,
                reflection =rbind(x, x[nR:(2 * nR - nR1 + 1), , drop=FALSE]),
                open = rbind(x, matrix(rep(x[nR,], addLength), ncol=ncol(x), byrow=TRUE)),
                circular = rbind(x, x[seq_len(nR1 - nR),, drop=FALSE]))
  }
  
  if (left > 0) {
    x <- switch(method,
                reflection =rbind(x[addLength:1, , drop=FALSE], x),
                open = rbind(matrix(rep(x[1,], addLength), ncol=ncol(x), byrow=TRUE), x),
                circular = rbind(x[(2 * nR - nR1 + 1):nR,, drop=FALSE], x))
  }
  if (ncol(x) == 1)  x <- as.vector(x)
  
  x
}
MSW.getLocalMaximumCWT <-  function(wCoefs, minWinSize=5, amp.Th=0)  {        ## from package MassSpecWavelet
  localMax <- NULL
  scales <- as.numeric(colnames(wCoefs))
  
  for (i in seq_along(scales)) {
    scale.i <- scales[i]
    winSize.i <- scale.i * 2 + 1
    if (winSize.i < minWinSize) {
      winSize.i <- minWinSize
    }
    temp <- MSW.localMaximum(wCoefs[,i], winSize.i)
    localMax <- cbind(localMax, temp)
  }
  ## Set the values less than peak threshold as 0
  localMax[wCoefs < amp.Th] <- 0
  colnames(localMax) <- colnames(wCoefs)
  rownames(localMax) <- rownames(wCoefs)
  localMax
}
MSW.localMaximum <-  function (x, winSize = 5)  {   ## from package MassSpecWavelet
  len <- length(x)
  rNum <- ceiling(len/winSize)
  
  ## Transform the vector as a matrix with column length equals winSize
  ## and find the maximum position at each row.
  y <- matrix(c(x, rep(x[len], rNum * winSize - len)), nrow=winSize)
  y.maxInd <- apply(y, 2, which.max)
  ## Only keep the maximum value larger than the boundary values
  selInd <- which(apply(y, 2, function(x) max(x) > x[1] & max(x) > x[winSize]))
  
  ## keep the result
  localMax <- rep(0, len)
  localMax[(selInd-1) * winSize + y.maxInd[selInd]] <- 1
  
  ## Shift the vector with winSize/2 and do the same operation
  shift <- floor(winSize/2)
  rNum <- ceiling((len + shift)/winSize)
  y <- matrix(c(rep(x[1], shift), x, rep(x[len], rNum * winSize - len - shift)), nrow=winSize)
  y.maxInd <- apply(y, 2, which.max)
  ## Only keep the maximum value larger than the boundary values
  selInd <- which(apply(y, 2, function(x) max(x) > x[1] & max(x) > x[winSize]))
  localMax[(selInd-1) * winSize + y.maxInd[selInd] - shift] <- 1
  
  ## Check whether there is some local maxima have in between distance less than winSize
  maxInd <- which(localMax > 0)
  selInd <- which(diff(maxInd) < winSize)
  if (length(selInd) > 0) {
    selMaxInd1 <- maxInd[selInd]
    selMaxInd2 <- maxInd[selInd + 1]
    temp <- x[selMaxInd1] - x[selMaxInd2]
    localMax[selMaxInd1[temp <= 0]] <- 0
    localMax[selMaxInd2[temp > 0]] <- 0
  }
  
  localMax
}
MSW.getRidge <-  function(localMax, iInit=ncol(localMax), step=-1, iFinal=1, minWinSize=3, gapTh=3, skip=NULL)  {  ## modified from package MassSpecWavelet
  
  scales <- as.numeric(colnames(localMax))
  if (is.null(scales))  scales <- seq_len(ncol(localMax))
  
  maxInd_curr <- which(localMax[, iInit] > 0)
  nMz <- nrow(localMax)
  
  if (is.null(skip)) {
    skip <- iInit + 1
  }
  
  ## Identify all the peak pathes from the coarse level to detail levels (high column to low column)
  ## Only consider the shortest path
  if ( ncol(localMax) > 1 ) colInd <- seq(iInit+step, iFinal, step)
  else colInd <- 1
  ridgeList <- as.list(maxInd_curr)
  names(ridgeList) <- maxInd_curr
  peakStatus <- as.list(rep(0, length(maxInd_curr)))
  names(peakStatus) <- maxInd_curr
  
  ## orphanRidgeList keep the ridges disconnected at certain scale level
  ## Changed by Pan Du 05/11/06
  orphanRidgeList <- NULL
  orphanRidgeName <- NULL
  nLevel <- length(colInd)
  
  for (j in seq_len(nLevel)) {
    col.j <- colInd[j]
    scale.j <- scales[col.j]
    
    if (colInd[j] == skip) {
      oldname <- names(ridgeList)
      ridgeList <- lapply(ridgeList, function(x) c(x, x[length(x)]))
      ##peakStatus <- lapply(peakStatus, function(x) c(x, x[length(x)]))
      names(ridgeList) <- oldname
      ##names(peakStatus) <- oldname
      next
    }
    
    if (length(maxInd_curr) == 0) {
      maxInd_curr <- which(localMax[, col.j] > 0)
      next
    }
    
    ## The slide window size is proportional to the CWT scale
    ## winSize.j <- scale.j / 2 + 1
    winSize.j <- floor(scale.j/2)
    if (winSize.j < minWinSize) {
      winSize.j <- minWinSize
    }
    
    selPeak.j <- NULL
    remove.j <- NULL
    for (k in seq_along(maxInd_curr)) {
      ind.k <- maxInd_curr[k]
      start.k <- ifelse(ind.k-winSize.j < 1, 1, ind.k-winSize.j)
      end.k <- ifelse(ind.k+winSize.j > nMz, nMz, ind.k+winSize.j)
      ind.curr <- which(localMax[start.k:end.k, col.j] > 0) + start.k - 1
      ##ind.curr <- which(localMax[, col.j] > 0)
      if (length(ind.curr) == 0) {
        status.k <- peakStatus[[as.character(ind.k)]]
        ## bug  work-around
        if (is.null(status.k)) status.k <- gapTh +1
        ##
        if (status.k > gapTh & scale.j >= 2) {
          temp <- ridgeList[[as.character(ind.k)]];
          
          if(length(temp)-status.k <= 0) next
          
          orphanRidgeList <- c(orphanRidgeList, list(temp[seq_len(length(temp)-status.k)]))
          orphanRidgeName <- c(orphanRidgeName, paste(col.j + status.k + 1, ind.k, sep='_'))
          remove.j <- c(remove.j, as.character(ind.k))
          next
        } else {
          ind.curr <- ind.k
          peakStatus[[as.character(ind.k)]] <- status.k + 1
        }
      } else {
        peakStatus[[as.character(ind.k)]] <- 0
        if (length(ind.curr) >= 2)  ind.curr <- ind.curr[which.min(abs(ind.curr - ind.k))]
      }
      ridgeList[[as.character(ind.k)]] <- c(ridgeList[[as.character(ind.k)]], ind.curr)
      selPeak.j <- c(selPeak.j, ind.curr)
    }
    ## Remove the disconnected lines from the currrent list
    if (length(remove.j) > 0) {
      removeInd <- which(names(ridgeList) %in% remove.j)
      ridgeList <- ridgeList[-removeInd]
      peakStatus <- peakStatus[-removeInd]
    }
    
    ## Check for duplicated selected peaks and only keep the one with the longest path.
    dupPeak.j <- unique(selPeak.j[duplicated(selPeak.j)])
    if (length(dupPeak.j) > 0) {
      removeInd <- NULL
      for (dupPeak.jk in dupPeak.j) {
        selInd <- which(selPeak.j == dupPeak.jk)
        selLen <- sapply(ridgeList[selInd], length)
        removeInd.jk <- which.max(selLen)
        removeInd <- c(removeInd, selInd[-removeInd.jk])
        orphanRidgeList <- c(orphanRidgeList, ridgeList[removeInd.jk])
        orphanRidgeName <- c(orphanRidgeName, paste(col.j, selPeak.j[removeInd.jk], sep='_'))
      }
      selPeak.j <- selPeak.j[-removeInd]
      ridgeList <- ridgeList[-removeInd]
      peakStatus <- peakStatus[-removeInd]
    }
    
    ## Update the names of the ridgeList as the new selected peaks
    ##if (scale.j >= 2) {
    if (length(ridgeList) > 0) names(ridgeList) <- selPeak.j
    if (length(peakStatus) > 0) names(peakStatus) <- selPeak.j
    ##}
    
    ## If the level is larger than 3, expand the peak list by including other unselected peaks at that level
    if (scale.j >= 2) {
      maxInd_next <- which(localMax[, col.j] > 0)
      unSelPeak.j <- maxInd_next[!(maxInd_next %in% selPeak.j)]
      newPeak.j <- as.list(unSelPeak.j)
      names(newPeak.j) <- unSelPeak.j
      ## Update ridgeList
      ridgeList <- c(ridgeList, newPeak.j)
      maxInd_curr <- c(selPeak.j, unSelPeak.j)
      ## Update peakStatus
      newPeakStatus <- as.list(rep(0, length(newPeak.j)))
      names(newPeakStatus) <- newPeak.j
      peakStatus <- c(peakStatus, newPeakStatus)
    } else {
      maxInd_curr <- selPeak.j
    }
  }
  
  ## Attach the peak level at the beginning of the ridge names
  if (length(ridgeList) > 0) names(ridgeList) <- paste(1, names(ridgeList), sep='_')
  if (length(orphanRidgeList) > 0) names(orphanRidgeList) <- orphanRidgeName
  ## Combine ridgeList and orphanRidgeList
  ridgeList <- c(ridgeList, orphanRidgeList)
  if (length(ridgeList) == 0) return(NULL)
  
  ## Reverse the order as from the low level to high level.
  ridgeList <- lapply(ridgeList, rev)
  ## order the ridgeList in increasing order
  ##ord <- order(selPeak.j)
  ##ridgeList <- ridgeList[ord]
  
  ## Remove possible duplicated ridges
  ridgeList <- ridgeList[!duplicated(names(ridgeList))]
  
  attr(ridgeList, 'class') <- 'ridgeList'
  attr(ridgeList, 'scales') <- scales
  return(ridgeList)
}
valueCount2ScanIndex <- function(valCount){
  ## Convert into 0 based.
  valCount <- cumsum(valCount)
  return(as.integer(c(0, valCount[-length(valCount)])))
}
descendMinTol <- function(d,startpos,maxDescOutlier) {
  l <- startpos[1]; r <- startpos[2]; outl <- 0; N <- length(d)
  ## left
  while ((l > 1) && (d[l] > 0) && outl <= maxDescOutlier) {
    if (outl > 0) vpos <- opos else vpos <- l
    if (d[l-1] > d[vpos]) outl <- outl + 1 else outl <- 0
    if (outl == 1) opos <- l
    l <- l -1
  }
  if (outl > 0) l <- l + outl
  ## right
  outl <- 0;
  while ((r < N) && (d[r] > 0) && outl <= maxDescOutlier) {
    if (outl > 0) vpos <- opos else vpos <- r
    if (d[r+1] > d[vpos]) outl <- outl + 1 else outl <- 0
    if (outl == 1) opos <- r
    r <- r + 1
  }
  if (outl > 0) r <- r - outl
  c(l,r)
}
joinOverlappingPeaks <- function(td, d, otd, omz, od, scantime, scan.range, peaks, maxGaussOverlap=0.5) {
  ## Fix issue #284: avoid having identical peaks multiple times in this
  ## matrix.
  peaks <- unique(peaks)
  gausspeaksidx <- which(!is.na(peaks[,"mu"]))
  Ngp <- length(gausspeaksidx)
  if (Ngp == 0)
    return(peaks)
  
  newpeaks <- NULL
  
  gpeaks <- peaks[gausspeaksidx, , drop = FALSE]
  if (nrow(peaks) - Ngp > 0)
    notgausspeaks <- peaks[-gausspeaksidx, , drop = FALSE]
  
  if (Ngp > 1) {
    comb <- which(upper.tri(matrix(0, Ngp, Ngp)), arr.ind = TRUE)
    overlap <- logical(nrow(comb))
    overlap <- rep(FALSE, dim(comb)[1])
    for (k in seq_len(nrow(comb))) {
      p1 <- comb[k, 1]
      p2 <- comb[k, 2]
      overlap[k] <- gaussCoverage(xlim = scan.range,
                                  h1 = gpeaks[p1, "h"],
                                  mu1 = gpeaks[p1, "mu"],
                                  s1 = gpeaks[p1, "sigma"],
                                  h2 = gpeaks[p2, "h"],
                                  mu2 = gpeaks[p2, "mu"],
                                  s2 = gpeaks[p2, "sigma"]) >=
        maxGaussOverlap
    }
  } else overlap <- FALSE
  
  if (any(overlap) && (Ngp > 1)) {
    jlist <- list()
    if (length(which(overlap)) > 1) {
      gm <- comb[overlap, ]
      ## create list of connected components
      cc <- list()
      cc[[1]] <- gm[1,] ## copy first entry
      for (j in 2:dim(gm)[1]) { ## search for connections
        ccl <- unlist(cc)
        nl <- sapply(cc, function(x) length(x))
        ccidx <- rep(seq_along(nl),nl)
        idx <- match(gm[j,],ccl)
        if (any(!is.na(idx))) { ## connection found, add to list
          pos <- ccidx[idx[which(!is.na(idx))[1]]]
          cc[[pos]] <- c(cc[[pos]],gm[j,])
        } else  ## create new list element
          cc[[length(cc) + 1]] <- gm[j,]
        
      }
      ccn <- list()
      lcc <- length(cc)
      ins <- rep(FALSE,lcc)
      if (lcc > 1) {
        jcomb <- which(upper.tri(matrix(0,lcc,lcc)),arr.ind = TRUE)
        for (j in seq_len(dim(jcomb)[1])) {
          j1 <- jcomb[j,1]; j2 <- jcomb[j,2]
          if (any(cc[[j1]] %in% cc[[j2]]))
            ccn[[length(ccn) +1]] <- unique(c(cc[[j1]],cc[[j2]]))
          else {
            if (!ins[j1]) {
              ccn[[length(ccn) +1]] <- unique(cc[[j1]])
              ins[j1] <- TRUE
            }
            if (!ins[j2]) {
              ccn[[length(ccn) +1]] <- unique(cc[[j2]])
              ins[j2] <- TRUE
            }
          }
        }
      } else ccn <- cc;
      
      size <- sapply(ccn, function(x) length(x))
      s2idx <- which(size >= 2)
      
      if (length(s2idx) > 0) {
        for (j in seq_along(s2idx)) {
          pgroup <- unique(ccn[[ s2idx[j] ]])
          jlist[[j]] <- pgroup
        }
      } else stop('(length(s2idx) = 0) ?!?')
    } else jlist[[1]] <- comb[overlap, ]
    
    ## join all peaks belonging to one cc
    for (j in seq_along(jlist)) {
      jidx <- jlist[[j]]
      newpeak <- gpeaks[jidx[1], , drop = FALSE]
      newmin <- min(gpeaks[jidx, "lmin"])
      newmax <- max(gpeaks[jidx, "lmax"])
      newpeak[1, "scpos"] <- -1 ## not defined after join
      newpeak[1, "scmin"] <- -1 ##    ..
      newpeak[1, "scmax"] <- -1 ##    ..
      newpeak[1, "scale"] <- -1 ##    ..
      
      newpeak[1, "maxo"] <- max(gpeaks[jidx, "maxo"])
      newpeak[1, "sn"]   <- max(gpeaks[jidx, "sn"])
      newpeak[1, "lmin"] <- newmin
      newpeak[1, "lmax"] <- newmax
      newpeak[1, "rtmin"] <- scantime[td[newmin]]
      newpeak[1, "rtmax"] <- scantime[td[newmax]]
      newpeak[1,"rt"] <- weighted.mean(gpeaks[jidx, "rt"],
                                       w = gpeaks[jidx, "maxo"])
      
      ## Re-assign m/z values
      p1 <- match(td[newmin], otd)[1]
      p2 <- match(td[newmax], otd)
      p2 <- p2[length(p2)]
      if (is.na(p1)) p1 <- 1
      if (is.na(p2)) p2 <- length(omz)
      mz.value <- omz[p1:p2]
      mz.int <- od[p1:p2]
      
      ## re-calculate m/z value for peak range
      #mzmean <- do.call(mzCenterFun, list(mz = mz.value,
      #                                    intensity = mz.int))
      mzmean <- weighted.mean(mz.value, mz.int)
      mzrange <- range(mz.value)
      newpeak[1, "mz"] <- mzmean
      newpeak[1, c("mzmin","mzmax")] <- mzrange
      
      ## re-fit gaussian
      md <- max(d[newmin:newmax])
      d1 <- d[newmin:newmax] / md
      pgauss <- fitGauss(td[newmin:newmax],
                         d[newmin:newmax],
                         pgauss = list(mu = td[newmin] +
                                         (