R/Optimize_params.R

Defines functions peakwidth_est ppmEstCore filterPpmError estimateSNThresh estimatePPM findTruePeaks filterPeaksfromNoise checkParams attachList encode decodeAll decode createModel findIsotopes.CAMERA testSinusDistribution testNormalDistribution getIntensitiesFromRawdata checkNormalDistribution checkSinusDistribution checkIntensitiesAtRtBoundaries findIsotopes.IPO getRGTVValues combineParams getCcdParameter typeCastParams expand.grid.subset getMaxSettings getMaximumLevels calcMaximumCarbon peaks_IPO getClusterType resultIncreased_doe extractPeakForGaussian extFUN SSgaussStats calcGaussianS calcRCS_GSValues calcCV calcPPS2 calculatePPKs calculateSet_doe SlaveCluster_doe Statistic_doe ExperimentsCluster_doe optimizxcms.doe.peakpicking optimize.xcms.doe PerformParamsOptimization

Documented in PerformParamsOptimization

#' @title Perform Parameters Optimization
#' @description This function is used to optimize the critical 
#' parameters of peak picking and alignment for 
#' the following data processing. It utilizes the trimed data and 
#' the internal instrument-specific parameters.
#' Parallel computing will be performed. The number of cores user 
#' want to use could be specified.
#' @param mSet mSet object, usually generated by 'PerformROIExtraction' 
#' or 'PerformDataTrimming' here.
#' @param param List, Parameters defined by 'SetPeakParam' function.
#' @param method Character, method of parameters optimization, including 
#' "DoE' only. Default is "DoE". Other method 
#' is under development.
#' @param ncore Numeric, CPU threads number used to perform the parallel 
#' based optimization. If thers is memory issue,
#' please reduce the 'ncore' used here. For default, 2/3 CPU threads of 
#' total will be used.
#' @param running.controller The resuming pipeline running controller. Optional. 
#' Don't need to define by hand.
#' @export
#' @import MSnbase
#' @import progress
#' @import parallel
#' @return will a parameter object can be used for following processing
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)
#' @examples 
#' 
#' DataFiles <- dir(system.file("mzData", package = "mtbls2"), full.names = TRUE,
#'                  recursive = TRUE)[c(10:12, 14:16)]
#' # remove the # before running the following command lines
#' # mSet <- PerformROIExtraction(datapath = DataFiles[c(1:2)],rt.idx = 0.25,
#' # rmConts = FALSE);#' 
#' # best_params <- PerformParamsOptimization(mSet, param = SetPeakParam(), ncore = 4);


PerformParamsOptimization <- function(mSet, 
                                      param= NULL, 
                                      method="DoE", 
                                      ncore=4, 
                                      running.controller=NULL){
  
  if (is(mSet,"mSet")) {
    raw_data <- mSet@rawInMemory;
  } else if (is(mSet,"MSnExp")) {
    raw_data <- mSet;
  } else {
    stop("Wrong mSet object provided !")
  }
  
  if(!exists(".SwapEnv")){
    .SwapEnv <<- new.env(parent = .GlobalEnv);
    .SwapEnv$.optimize_switch <- FALSE;
    .SwapEnv$count_current_sample <- 0;
    .SwapEnv$count_total_sample <- 120; # maximum number for on.public.web
    .SwapEnv$envir <- new.env();
  }
  
  if(is.null(.SwapEnv$GaussModel)){
    .SwapEnv$GaussModel <- GaussModel;
  }
  
  if(missing(param) | is.null(param)) {
    param <- SetPeakParam();
  }
  .optimize_switch <- .SwapEnv$.optimize_switch <- TRUE;
  
  #Build Running plan for optimization - Indentify the controller
  if (is.null(running.controller)) {
    c1 <- TRUE;
    .running.as.plan <- FALSE;
  } else {
    c1 <- running.controller@others_1[["c1"]];
    .running.as.plan <- TRUE;
  }
  
  MessageOutput("\nStep 1/6: Start to optimize parameters! 
                \nThis step may take a long time...", "\n", NULL)

  start.time<-Sys.time();
  
  if (c1){
    
    if (missing(param)){
      stop("Please provide the param with 'SetPeakParam' function !")
    } else if (missing(raw_data)){
      stop("Please provide the data of MSnExp format!" )
    } else if (missing(method)){
      method<-"DoE";
    };
    
    MessageOutput(paste0("DoE Optimization Starting Now... (",
                         Sys.time(),")"), "\n", NULL)
    
    if (.on.public.web()){
      ncore <- 2;
    } else if (missing(ncore)){
      ncore <- ceiling(detectCores()*2/3);
      message("'ncore' is absent, will use 2/3 CPU threads of total!")
    };
    
    # Define the global Parallel cores for Single Core function mode
    if (ncore == 1){
      if (.Platform$OS.type=="unix"){
        register(bpstart(MulticoreParam(ncore)))
      } else {
        register(bpstart(SnowParam(ncore)))
      };
    }
    
    ## Optimize the noise and prefilter indexes with AutoTuner
    if (param[["Peak_method"]] == "centWave"){
      MessageOutput("Evaluating Noise level...", "\n", NULL)
      save(raw_data, file = "raw_data_noise.rda")
      p2 <- tryCatch(
        Noise_evaluate(raw_data),
        error = function(e) {
          e
        }
      )
      MessageOutput(paste0("ppm is estimated as : ", p2$ppm), "\n")
      if (is(p2,"simpleError")) {
        
          print_mes_tmp <-
            paste0("\n",
                   "<font color=\"red\">",
                   "ERROR:",
                   p2$message,
                   "</font>",
                   "\n")
          
          print_mes_tmp2 <-
            paste0(
              "<font color=\"orange\">",
              "Don't worry: Will use default noise parameters instead!",
              "</font>"
            )
          
          print_mes <- paste0(print_mes_tmp, print_mes_tmp2)
          MessageOutput(print_mes, "\n", NULL)
          
      } else {
        MessageOutput("Done!", "\n", NULL)
      }
      
      
      if(is(p2,"simpleError")){
        
        #param[["ppm"]]<-15;
        param[["noise"]]<-100;
        param[["prefilter"]]<-3;
        param[["value_of_prefilter"]]<-10;
        
      } else {
        
        if(!is.nan(p2$ppm) & !is.infinite(p2$ppm)){
          p2[["ppm"]]<-round(p2$ppm,2);
        } else {
          p2[["ppm"]] <- param[["ppm"]];
        }
        if(!is.nan(p2$noise) & !is.infinite(p2$noise)){
          p2[["noise"]]<-round(p2$noise,2);
        } else {
          p2[["noise"]] <- 100;
        }
        if(!is.nan(p2$prefilter) & 
           !is.infinite(p2$prefilter)){
          p2[["prefilter"]]<-round(p2$prefilter,2);
        } else {
          p2[["prefilter"]] <- 3;
        }
        if(!is.nan(p2$value_of_prefilter) & 
           !is.infinite(p2$value_of_prefilter)){
          p2[["value_of_prefilter"]]<-round(p2$value_of_prefilter,2);
        } else {
          p2[["value_of_prefilter"]] <- 10;
        }
        
        param[["ppm"]]<-round(p2$ppm,2);
        param[["noise"]]<-round(p2$noise,2);
        param[["prefilter"]]<-round(p2$prefilter,2);
        param[["value_of_prefilter"]]<-round(p2$value_of_prefilter,2);
        
        if(round(p2$ppm,2) < 1 | round(p2$ppm,2) > 100){
          param[["ppm"]] <- param[["ppm"]]; # Estimation failure, use the default instead !
        } else {
          param[["ppm"]] <- round(p2$ppm,2);
        }
        
        if(round(p2$prefilter,2) < 2){
          param[["prefilter"]] <- 3; # Estimation failure, use the default instead !
        } else {
          param[["prefilter"]] <- round(p2$prefilter,2);
        }
        
        if(round(p2$value_of_prefilter,2) < 10){
          param[["value_of_prefilter"]] <- 10; 
          # Estimation failure, use the default instead !
        } else {
          param[["value_of_prefilter"]] <- round(p2$value_of_prefilter,2);
        }
        
        if(round(p2$noise,2) < 0){
          param[["noise"]] <- 100; # Estimation failure, use the default instead !
        } else {
          param[["noise"]] <- round(p2$noise,2);
        }
        
      }
      
      MessageOutput(NULL, NULL, 5.00);
    }
    
    if (method=="DoE"){
      p1 <- optimize.xcms.doe(raw_data,param=param,ncore=ncore); 
    };
    
    if(.running.as.plan){
      cache.save(p1, funpartnm= "others_c1");
      marker_record("others_c1");
    }

  } else {
    p1 <- cache.read ("others","c1");
    marker_record("others_c1");
  }
  
  if (.on.public.web()){
    MessageOutput(NULL, NULL, 20.00);
    
    if (is(p1,"simpleError")) {
      print_mes <-
        paste0("<font color=\"red\">",
               "\nERROR:",
               p1$message,
               "</font>",
               "\n")
      
      print_mes_tmp2 <-
        paste0(
          "<font color=\"orange\">",
          "ADVICE: Please follow the methods above to correct 
          or use the default instead!",
          "</font>"
        )
      
      print_mes <- paste0(print_mes, print_mes_tmp2)
      
      MessageOutput(print_mes, "\n", NULL)
      
      stop("EXCEPTION POINT CODE: PO2")
      
    }
    
  } else {
    end.time<-Sys.time();
    message("Time Spent In Total:",round((as.numeric(end.time) - 
                                            as.numeric(start.time))/60, 1),"mins");
  }
  
  best_params <- p1;
  
  if(.on.public.web()) {
    save(best_params, file = "best_params.rda");
    return(best_params);
  } else {
    return(best_params);
  }
}

#' @title Overall Funtion for DoE
#' @description This function is the overall function to handle the 
#' starting of the optimization process and 
#' pre-define the parameters' range according to the input of the 
#' parameters for following Design of Experiment (DoE).
#' @param raw_data MSnExp object, The trimmed or original data input 
#' for optimization.
#' @param param List, the parameters lists set by 'SetPeakParam' function. 
#' The noise, prefilter and ppm values should 
#' be defined by AutoTuner in the previous steps.
#' @param ncore Numeric, core number used to perform the parallel 
#' based optimization. 
#' @noRd
#' @import MSnbase
#' @import progress
#' @import parallel
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

optimize.xcms.doe <- function(raw_data, param, ncore = 8){
  #### Parameters Setting for optimization!------
  if (is.null(param)){
    stop("Please provide the param with 'SetPeakParam' function !")
  } else {Parameters<-param};
  
  ## Define the range of the parameters for optimization
  if (Parameters$Peak_method=="centWave" && Parameters$RT_method=="loess"){
    ## Keep these Parameters
    Parameters$value_of_prefilter <- Parameters$value_of_prefilter;
    ## Parameters for peak picking
    Parameters$max_peakwidth <- c(Parameters$max_peakwidth*0.75,
                                  Parameters$max_peakwidth*1.25);
    Parameters$min_peakwidth <- c((Parameters$min_peakwidth)*0.75,
                                  (Parameters$min_peakwidth)*1.25);
    #Parameters$ppm <- c(Parameters$ppm*0.5,Parameters$ppm*1.5)
    Parameters$mzdiff <- c(-Parameters$mzdiff*1.2, 
                           Parameters$mzdiff*1.2);
    Parameters$snthresh<-c(Parameters$snthresh*0.75,
                           Parameters$snthresh*1.25);
    ## Parameters for Alignment
    Parameters$bw<-c(Parameters$bw*0.5,Parameters$bw*1.5);
  } else 
    if (Parameters$Peak_method=="centWave" && 
        Parameters$RT_method=="obiwarp"){
      ## Keep these Parameters
      Parameters$value_of_prefilter <- Parameters$value_of_prefilter;
      ## Parameters for peak picking
      Parameters$max_peakwidth <- c(Parameters$max_peakwidth*0.75,
                                    Parameters$max_peakwidth*1.25)
      Parameters$min_peakwidth <- c((Parameters$min_peakwidth)*0.75,
                                    (Parameters$min_peakwidth)*1.25)
      #Parameters$ppm <- c(1,Parameters$ppm*2);
      Parameters$mzdiff <- c(-Parameters$mzdiff*2, 
                             Parameters$mzdiff*2);
      Parameters$snthresh<-c(Parameters$snthresh*0.75,
                             Parameters$snthresh*1.25)
      ## Parameters for Alignment
      Parameters$bw<-c(Parameters$bw*0.5,Parameters$bw*1.5)
    } else 
      if (Parameters$Peak_method=="matchedFilter" && 
          Parameters$RT_method=="loess"){
        ## Parameters for peak picking
        Parameters$fwhm <- c((Parameters$fwhm)*0.8,
                             (Parameters$fwhm)*1.2)
        Parameters$sigma <- c(Parameters$sigma*0.8,
                              Parameters$sigma*1.2)
        Parameters$steps <- c(Parameters$steps-1,
                              Parameters$steps+1)
        Parameters$mzdiff <- c(-Parameters$mzdiff*2, 
                               Parameters$mzdiff*2)
        Parameters$snthresh<-c(1,Parameters$snthresh*10)
        ## Parameters for Alignment
        Parameters$bw<-c(Parameters$bw*0.5,
                         Parameters$bw*1.5)
      } else 
        if (Parameters$Peak_method=="matchedFilter" && 
            Parameters$RT_method=="obiwarp"){
          ## Parameters for peak picking
          Parameters$fwhm <- c((Parameters$fwhm)*0.8,
                               (Parameters$fwhm)*1.2)
          Parameters$sigma <- c(Parameters$sigma*0.8,
                                Parameters$sigma*1.2)
          Parameters$steps <- c(Parameters$steps-1,
                                Parameters$steps+1)
          Parameters$mzdiff <- c(-Parameters$mzdiff*2, 
                                 Parameters$mzdiff*2)
          Parameters$snthresh<-c(1,Parameters$snthresh*10)
          ## Parameters for Alignment
          Parameters$bw<-c(Parameters$bw*0.5,
                           Parameters$bw*1.5)
        } else {
          stop("There must be something wrong about the 
               Peak_method value in your primary params set !")
        };
  
  MessageOutput("Preparing Parameters for optimization finished !", "\n", NULL);

  if (.Platform$OS.type=="unix"){
    bp <- MulticoreParam(ncore);
  } else {
    bp <- SnowParam(ncore);
  };
  
  #### Start to Optimize !
  result <- optimizxcms.doe.peakpicking(object = raw_data, 
                                        params = Parameters, 
                                        BPPARAM = bp,
                                        nSlaves = ncore, 
                                        subdir = NULL, 
                                        plot = FALSE);
  
  optimizedxcmsObject <- result$best_settings$xset;
  
  ##Parameters Out-put
  if (length(result) > 0) {
    if (!is.null(result[["best_settings"]][["parameters"]])) {
      best_parameters <- result[["best_settings"]][["parameters"]]
    } else{
      return(param)
    }
  } else{
    return(param)
  }
  
  MessageOutput(paste0("Step 1/6: Parameters Optimization Finished ! (",
                       Sys.time(),")"), "\n", NULL);

  return(best_parameters)
}

#' @title Core Optimization Function of DoE
#' @description This function is the core for parameters optimization 
#' with Design of Experiment (DoE)
#' method.
#' @param object MSnExp object, the trimmed or the original data.
#' @param params List, the parameters lists set by 'SetPeakParam' function. 
#' The noise, prefilter and ppm values should 
#' be defined by AutoTuner in the previous steps.
#' @param nSlaves Numeric, core number used to perform the parallel based optimization.
#' @param BPPARAM MulticoreParam method, used to set the parallel method. 
#' Default is bpparam().
#' @param plot Logical, weather to plot the Contours plots of the DoE results.
#' @param ... Other parameters.
#' @noRd
#' @import MSnbase
#' @import progress
#' @import parallel
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

optimizxcms.doe.peakpicking <- function(object = NULL, 
                                        params = params, 
                                        BPPARAM = bpparam(), 
                                        nSlaves = 4, 
                                        plot = FALSE,...) {
  
  isotopeIdentification = c("IPO");
  centWave <- is.null(params$fwhm)  ;
  history <- list();
  iterator = 1;
  best_range <- 0.25;
  
  object_mslevel<-PeakPicking_prep(object);
  
  while(iterator < 20) {#Forcely stop to ensure the reasonability!
    MessageOutput(paste0("Round:",iterator, "\nDoE Running Begin..."), 
                  "\n", NULL)

    mSet_OPT <- 
      ExperimentsCluster_doe(
        object = object,
        object_mslevel=object_mslevel,
        params = params,
        isotopeIdentification = isotopeIdentification,
        BPPARAM = BPPARAM,
        nSlaves = nSlaves,
        iterator = iterator
      )
    
    MessageOutput(paste0("100% |"), "\n", NULL);
    
    ### Normalize the PPS and CV in mSet_OPT
    PPS.set<-as.numeric(sapply(seq_len(nrow(mSet_OPT[["response"]])),
                               FUN=function(x){
      mSet_OPT[["response"]][x,5]
    }));
    
    CV.set<-as.numeric(sapply(seq_len(nrow(mSet_OPT[["response"]])),
                              FUN=function(x){
      mSet_OPT[["response"]][x,6]
    }))
    
    RCS.set<-as.numeric(sapply(seq_len(nrow(mSet_OPT[["response"]])),
                               FUN=function(x){
      mSet_OPT[["response"]][x,7]
    }))
    
    GS.set<-as.numeric(sapply(seq_len(nrow(mSet_OPT[["response"]])),
                              FUN=function(x){
      mSet_OPT[["response"]][x,8]
    }))
    
    GaussianSI.set<-as.numeric(sapply(seq_len(nrow(mSet_OPT[["response"]])),
                                      FUN=function(x){
      mSet_OPT[["response"]][x,9]
    }))
    
    index.set<-list(CV=CV.set,
                    RCS=RCS.set,
                    GS=GS.set,
                    GaussianSI=GaussianSI.set)
    
    # Normalize the index
    CV.set.normalized<-(CV.set-min(CV.set))/(max(CV.set)-min(CV.set))
    RCS.set.normalized<-(RCS.set-min(RCS.set))/(max(RCS.set)-min(RCS.set))
    GS.set.normalized<-(GS.set-min(GS.set))/(max(GS.set)-min(GS.set))
    GaussianSI.set.normalized<-GaussianSI.set
    
    # Calculate QCoE
    QCoE<-CV.set.normalized*0.2+RCS.set.normalized*0.4+
      GS.set.normalized*0.4
    # Calculate QS
    QS<-PPS.set*QCoE*GaussianSI.set.normalized;
    
    tmp_matrix<-mSet_OPT[["response"]];
    tmp_matrix<-cbind(tmp_matrix,PPS.set,CV.set.normalized,
                      RCS.set.normalized,GS.set.normalized,
                      GaussianSI.set.normalized,QCoE,QS)
    mSet_OPT[["response"]]<-tmp_matrix;
    
    MessageOutput(paste0("Round ",iterator," Finished !"), "\n", NULL)
    
    mSet_OPT <-
      tryCatch(Statistic_doe(
        object = object,
        object_mslevel=object_mslevel,
        isotopeIdentification = isotopeIdentification,
        BPPARAM = BPPARAM,
        subdir = NULL,
        plot = FALSE,
        mSet_OPT = mSet_OPT,
        iterator = iterator,
        index.set = index.set,
        useNoise = params[["noise"]]
      ), error = function(e) e)
    
    if(is(mSet_OPT,"simpleError")){
      MessageOutput("Optimization Failed: Too few peaks in your data ! 
                    Will use default parameters for processing!","\n")
      break;
    }
    
    history[[iterator]] <- mSet_OPT     
    
    params <- mSet_OPT$params
    
    increaRes <- resultIncreased_doe(history);
    if(!is.null(increaRes)){
      if(!increaRes) {
        
        MessageOutput("No Increase Stopping !")
        
        maxima <- 0
        max_index <- 1
        for(i in seq_along(history)) {
          ## TODO: need to think more on this discrimination criteria: based on max setting or max QS?
          if(history[[i]]$max_settings[1] > maxima) {
            maxima <- history[[i]]$max_settings[1]
            max_index <- i
          }
        }
        
        xcms_parameters <- mSet_OPT$OptiParams;
        
        #   as.list(decodeAll(history[[max_index]]$max_settings[-1],
        #                     history[[max_index]]$params$to_optimize))      
        # 
        # xcms_parameters <- combineParams(xcms_parameters, 
        #                                  params$no_optimization)
        # 
        # if(!is.list(xcms_parameters))
        #   xcms_parameters <- as.list(xcms_parameters)
        # 
        # # deal with the too narrow peak width issue
        # pkmin <- xcms_parameters$min_peakwidth;
        # pkmax <- xcms_parameters$max_peakwidth;
        # 
        # if(abs(pkmax - pkmin) < 5 & pkmin > 5){
        #   xcms_parameters$max_peakwidth <- pkmax + 2.5;
        #   xcms_parameters$min_peakwidth <- pkmin - 2.5;
        # } else if (abs(pkmax - pkmin) < 5 & pkmin < 5) {
        #   xcms_parameters$max_peakwidth <- pkmax + 5;
        # }
        
        best_settings <- list()
        best_settings$parameters <- xcms_parameters
        #best_settings$xset <- history[[max_index]]$xset
        
        #target_value <- history[[max_index]]$QS 
        #best_settings$result <- target_value
        history$best_settings <- best_settings
        
        if(!.on.public.web()){
          message("best parameter settings:")
          cat(paste(rbind(paste(names(xcms_parameters), sep="", ": "), 
                              paste(xcms_parameters, sep="", "\n")), sep=""))
        }
        return(history)
      }
    } else {
      MessageOutput("Optimization Failed: Too few peaks in your data ! Will use default parameters for processing!","\n")
      break;
    }
    
    for(i in seq_along(params$to_optimize)) {
      parameter_setting <- mSet_OPT$max_settings[i+1]
      bounds <- params$to_optimize[[i]] 
      fact <- names(params$to_optimize)[i]
      
      min_factor <- 
        ifelse(fact=="min_peakwidth", 3, 
               ifelse(fact=="mzdiff", 
                      ifelse(centWave,-0.02, 0.001), 
                      ifelse(fact=="step",0.0005,
                             ifelse(fact=="bw",2,
                                    ifelse(fact=="snthresh",5,1)))))
      
      step_factor <- 
        ifelse(is.na(parameter_setting), 1.2, 
               ifelse((abs(parameter_setting) < best_range),  0.8, 
                      ifelse(parameter_setting==-1 & 
                               decode(-1, params$to_optimize[[i]]) ==
                               min_factor, 0.8, 1)))
      
      step <- (diff(bounds) / 2) * step_factor
      
      if(is.na(parameter_setting))
        parameter_setting <- 0
      
      new_center <- decode(parameter_setting, bounds)
      
      if((new_center-min_factor) > step) {
        new_bounds <- c(new_center - step, new_center + step) 
      } else {
        new_bounds <- c(min_factor, 2*step+min_factor) 
      }      
      
      names(new_bounds) <- NULL         
      
      if(names(params$to_optimize)[i] == "steps" | 
         names(params$to_optimize)[i] == "prefilter") {
        params$to_optimize[[i]] <- round(new_bounds, 0)
      } else { 
        params$to_optimize[[i]] <- new_bounds
      }
    } 
    
    if(centWave) {
      
      if(!is.null(params$to_optimize$min_peakwidth) | 
         !is.null(params$to_optimize$max_peakwidth)) {
        
        pw_min <- 
          ifelse(is.null(params$to_optimize$min_peakwidth), 
                 params$no_optimization$min_peakwidth, 
                 max(params$to_optimize$min_peakwidth))
        
        pw_max <- 
          ifelse(is.null(params$to_optimize$max_peakwidth), 
                 params$no_optimization$max_peakwidth, 
                 min(params$to_optimize$max_peakwidth))
        
        if(pw_min >= pw_max) {
          additional <- abs(pw_min-pw_max) + 1
          
          if(!is.null(params$to_optimize$max_peakwidth)) {
            params$to_optimize$max_peakwidth <- 
              params$to_optimize$max_peakwidth + additional
          } else {
            params$no_optimization$max_peakwidth <- 
              params$no_optimization$max_peakwidth + additional
          }
          
        }
      }
      
    }
    
    params <- attachList(params$to_optimize, params$no_optimization)
    iterator <- iterator + 1
    
  }
  
  MessageOutput(NULL, NULL, 19);
  params <- attachList(params$to_optimize, params$no_optimization)
  
  return(history)
}

#' @title Experiment Functions of DoE
#' @description This function is used to perform the test with Design of Experiment (DoE) on the parameters dataset.
#' @param object MSnExp object, the trimmed or the original data.
#' @param object_mslevel List, the parsed metabolomics scans produced by PeakPicking_prep.
#' @param params parameters set for optimization
#' @param isotopeIdentification Character, IsotopeIdentidication method, usually includes 'IPO' and 'CAMERA'.
#' @param BPPARAM MulticoreParam method, used to set the parallel method. Default is bpparam().
#' @param nSlaves Numeric, core number used to perform the parallel based optimization.
#' @param iterator round of DoE
#' @noRd
#' @import MSnbase
#' @importFrom rsm decode.data ccd rsm
#' @import progress
#' @import parallel
#' @importFrom parallel clusterExport
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

ExperimentsCluster_doe <-function(object, object_mslevel,params, 
                                  isotopeIdentification, BPPARAM = bpparam(),nSlaves=4, iterator) { 
  
  # To form a parameters combination table
  typ_params <- typeCastParams(params) 
  
  if(length(typ_params$to_optimize)>1) {
    
    design <- getCcdParameter(typ_params$to_optimize);
    param_design <- rsm::decode.data(design);
    
  } else {
    design <- data.frame(run.order=seq_len(9), a=seq(-1,1,0.25))
    colnames(design)[2] <- names(typ_params$to_optimize)
    param_design <- design
    param_design[,2] <- 
      seq(min(typ_params$to_optimize[[1]]), 
          max(typ_params$to_optimize[[1]]), 
          diff(typ_params$to_optimize[[1]])/8)
  }
  
  param_design <- combineParams(param_design, typ_params$no_optimization)   
  
  design_list <- apply(param_design,1,FUN = function(x){
    as.list(x)
  })
  
  tasks <- seq_len(nrow(design));
  
  if (.Platform$OS.type=="windows"){
    MessageOutput("Your OS is Windows, there might be unexpected errors.\n")
    MessageOutput("If there is some unexpected bugs, please reduce the 'core' as 1.\n")
  }
  
  if (.on.public.web()){
    nSlaves <- 2;
  }
  
  # Parallel or single core
  if(nSlaves > 1) {
    
    if (.on.public.web()){
      nstepby <- 5;
      nstep<-ceiling(length(tasks)/nstepby);
      
    } else {
      # Memory optimization strategy
      ncount <- object@phenoData@data[["sample_name"]];
      
      data.size <- round(as.numeric(object.size(object) / 1024 / 1024), 1)
      
      if (.Platform$OS.type == "unix") {
        memtotal <-
          ceiling(as.numeric(
            system("awk '/MemTotal/ {print $2}' /proc/meminfo", intern = TRUE)
          ) / 1024 / 1024)
      }
      if (.Platform$OS.type == "windows") {
        memtotal <-
          ceiling(as.numeric(gsub(
            "\r", "", gsub(
              "TotalVisibleMemorySize=",
              "",
              system('wmic OS get TotalVisibleMemorySize /Value', intern = TRUE)[3]
            )
          )) / 1024 / 1024)
      }
      if (data.size < 1) {
        data.size <- 0.5
      }
     
      if (memtotal / data.size > 30) {
        nstepby <- ceiling(memtotal * 1.5 / (data.size * 32))
      } else if (memtotal / data.size < 30 &&
                 memtotal / data.size > 15) {
        nstepby <- ceiling(memtotal * 1 / (data.size * 32))
      } else {
        nstepby <- ceiling(memtotal * 0.5 / (data.size * 32))
      }
      
      nstep <- ceiling(length(tasks) / nstepby)
    }
    
    ## Parallel Setting
    #if('snow' %in% rownames(installed.packages())){
    #  unloadNamespace("snow")
    #}
    
    cl_type <- getClusterType()
    cl <- parallel::makeCluster(nSlaves,type = cl_type)
    
    response <- matrix(0, nrow=length(design[[1]]), ncol=9)
    
    parallel::clusterExport(cl, .optimize_function_list, envir = asNamespace("OptiLCMS"))

    
    # Setting progress bar and start the running loop
    pb <- progress_bar$new(format = "DoE Running [:bar] :percent Time left: :eta", total = nstep, clear = TRUE, width= 75)
    
    if (.on.public.web()){
      print_mes <- paste0("Finished: ");    
      write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE ,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "");
    }
    
    for (w in seq_len(nstep)){
      
      if (.on.public.web()){
        
        count_tmp <- pb$tick();
        totalcount <- formals(count_tmp[["initialize"]])[["total"]];
        currentcount <- environment(count_tmp[["tick"]])[["private"]][["current"]];
        
        write.table((w/nstep*(iterator/4)*3.5+(iterator-1)*3.5+5), file = "log_progress.txt", row.names = FALSE,col.names = FALSE);
        print_mes <- paste(round(w/(nstep+1), digits=2)*100, "%");    
        write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE,row.names = FALSE,col.names = FALSE, quote = FALSE, eol = " | ");
        
      } else {
        pb$tick();
      }
      
      
      value.index<-tasks[c(seq_len(nstepby))+(w-1)*nstepby];
      if (NA %in% tasks[c(seq_len(nstepby))+(w-1)*nstepby]){
        value.index<-value.index[-which(is.na(tasks[c(seq_len(nstepby))+(w-1)*nstepby]))]
      }
      
      response0 <- parallel::parSapply(
        cl = cl,
        X = value.index,
        FUN = SlaveCluster_doe,
        Set_parameters = design_list,
        object = object,
        object_mslevel=object_mslevel,
        isotopeIdentification = isotopeIdentification,
        BPPARAM = BPPARAM,
        USE.NAMES = FALSE,
        singleCore = FALSE
      )
      
      response[value.index,]<-t(response0)
    }
    
    parallel::stopCluster(cl)
  } else {
    message("Processing .",appendLF = FALSE)
    response <-
      sapply(
        X = tasks,
        FUN = SlaveCluster_doe,
        Set_parameters = design_list,
        object = object,
        object_mslevel=object_mslevel,
        isotopeIdentification = isotopeIdentification,
        BPPARAM = BPPARAM,
        singleCore = TRUE
      )
    response <- t(response)
    message(" OK, ",appendLF = FALSE)
  }
  
  colnames(response) <- c("exp", "num_peaks", "notLLOQP", "num_C13", "PPS","CV","RCS","GS","GaussianSI")
  response <- response[order(response[,1]),]
  
  ret <- list()
  ret$params <- typ_params
  ret$design <- design
  ret$response <- response
  return(ret)
  
}

#' @title Analyze DoE Result
#' @description Analyze Design of Experiment (DoE) Result
#' @param object MSnExp object, the trimmed or the original data.
#' @param object_mslevel List, the parsed metabolomics scans produced by PeakPicking_prep.
#' @param isotopeIdentification Character, IsotopeIdentidication method, usually includes 'IPO' and 'CAMERA'.
#' @param BPPARAM MulticoreParam method, used to set the parallel method. Default is bpparam().
#' @param mSet_OPT List, the result produced by 'ExperimentsCluster'.
#' @param subdir Logical, weather to creat a sub-directory (if true) or not (if false).
#' @param plot Logical, weather to plot the Contours plots of the DoE results.
#' @param iterator Numeric, the round number of the DoE.
#' @param index.set List, the indexes set (including PPS, CV, RCS, GS and Gaussian Index) produced by 
#' ExperiemntCluster.
#' @param useNoise Numeric, the noise level removed to evalute the gaussian peak.
#' @noRd
#' @import MSnbase
#' @import parallel
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

Statistic_doe <-function(object, object_mslevel, isotopeIdentification, 
                         BPPARAM = bpparam(), mSet_OPT, subdir = NULL ,plot = FALSE,iterator, 
                         index.set,useNoise) {

  MessageOutput(paste0("Model Parsing..."), "\n", NULL)

  # Prepare parameters for model prediction
  params <- mSet_OPT$params
  resp <- mSet_OPT$response[, "QS"]
  
  # Creat the prediction model and get the predicted best results
  model <- createModel(mSet_OPT$design, params$to_optimize, resp)
  mSet_OPT$model <- model                  
  max_settings <- getMaximumLevels(mSet_OPT$model)
  
  tmp <- max_settings[1,-1] # first row without response
  tmp[is.na(tmp)] <- 1 # if Na (i.e. -1, and 1), show +1
  
  mSet_OPT$max_settings <- max_settings
  xcms_parameters <- 
    as.list(decodeAll(max_settings[-1], params$to_optimize))      
  xcms_parameters <- 
    combineParams(xcms_parameters, params$no_optimization)
  
  if(!is.list(xcms_parameters))
    xcms_parameters <- as.list(xcms_parameters)
  
  # deal with the too narrow peak width issue
  pkmin <- xcms_parameters$min_peakwidth;
  pkmax <- xcms_parameters$max_peakwidth;
  
  if(abs(pkmax - pkmin) < 7.5 & pkmin > 7.5){ # TO ENSURE THE PEAK WIDTH IS OVER 7.5
    xcms_parameters$max_peakwidth <- pkmax + 3.75;
    xcms_parameters$min_peakwidth <- pkmin - 3.75;
  } else if (abs(pkmax - pkmin) < 7.5 & pkmin < 7.5) {
    xcms_parameters$max_peakwidth <- pkmax + 7.5;
  }
  
  # Detect the peak features with the predicted best parameters
  mSet <- suppressMessages(calculateSet_doe(object = object, object_mslevel=object_mslevel, 
                                            Set_parameters = xcms_parameters,
                                            task = 1, BPPARAM = BPPARAM));
  
  if(!is(mSet, "mSet")){ # All params failed - avoid this corner case
    Eset <- list()
    Eset$RCS <- Eset$CV <- Eset$PPS <- 0;
    
    mSet_OPT$QS <- 0;
    mSet_OPT$PPS <- Eset;
    
    mSet_OPT$OptiParams <- xcms_parameters;
    
    if (.on.public.web()){
      print_mes <- paste0("Model Parsing Done !\n");    
      write.table(print_mes,file="metaboanalyst_spec_proc.txt",append = TRUE, row.names = FALSE,col.names = FALSE, quote = FALSE, eol = "\n");
    } else {
      message("Model Parsing Done !\n")
    } 
    
    return(mSet_OPT)
  }
  
  # xset <- mSet[["xcmsSet"]]
  # Calculate the various indexes
  
  #mSet_OPT$xset <- xset
  mSet_OPT$PPS <- calcPPS2(mSet, isotopeIdentification)
  suppressWarnings(mSet_OPT$PPS$CV <-
                     suppressMessages(calcCV(mSet)))
  
  mSet_OPT$PPS$RCS <- suppressMessages(calcRCS_GSValues(mSet)$RCS)
  
  mSet_OPT$PPS$GS <- suppressMessages(calcRCS_GSValues(mSet)$GS)
  
  mSet_OPT$PPS$GaussianSI <-
    calcGaussianS(mSet, object, useNoise = useNoise)
  
  MessageOutput(paste0("Gaussian peak ratio (%): ", round(mSet_OPT$PPS$GaussianSI,3)*100, ""));
  #MessageOutput(paste0("Total peak number is: ", as.numeric(mSet_OPT$PPS[5])));
  
  #save(mSet_OPT, file = paste0("mSet_",Sys.time(),"_780.rda"));
  ## Normalize the CV, RCS, GS, GaussianSI
  normalized.CV<-(mSet_OPT$PPS$CV-min(index.set$CV))/(max(index.set$CV)-min(index.set$CV));
  normalized.RCS<-(mSet_OPT$PPS$RCS-min(index.set$RCS))/(max(index.set$RCS)-min(index.set$RCS));
  normalized.GS<-(mSet_OPT$PPS$GS-min(index.set$GS))/(max(index.set$GS)-min(index.set$GS));
  normalized.GaussianSI<-mSet_OPT$PPS$GaussianSI;
  
  ## Calculate the QS for the best combination in current iterator!
  QCoE<-0.2*(normalized.CV)+0.4*(normalized.GS+normalized.RCS);
  mSet_OPT$QS<-as.numeric(mSet_OPT$PPS[5])*QCoE*normalized.GaussianSI^1.5;
  
  mSet_OPT$OptiParams <- xcms_parameters;
  
  MessageOutput(paste0("Model Parsing Done !\n"), "\n", NULL)

  return(mSet_OPT)
}

#' @title Core Peak Picking Slave Cluster
#' @description Core Peak Picking Slave Cluster
#' @param task Numeric, task order for XCMS paramters table to run the peak picking and alignment.
#' @param Set_parameters Matrix, the parameters combination produced automatically according to 
#' the primary parameters input.
#' @param object object, the trimmed or the original data.
#' @param object_mslevel List, the parsed metabolomics scans produced by PeakPicking_prep.
#' @param isotopeIdentification Character, IsotopeIdentidication method, usually includes 'IPO' and 'CAMERA'.
#' @param BPPARAM MulticoreParam method, used to set the parallel method. Default is bpparam().
#' @noRd
#' @import MSnbase
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

SlaveCluster_doe <-function(task, Set_parameters, object, object_mslevel, 
                            isotopeIdentification, BPPARAM = bpparam(), singleCore = FALSE) {

  mSet <-
    calculateSet_doe(
      object = object,
      object_mslevel=object_mslevel,
      Set_parameters = Set_parameters,
      task = task,
      BPPARAM = BPPARAM
    )
  if(singleCore){
    #MessageOutput(paste("Finished", task,"/",length(Set_parameters),"in this round !"), SuppressWeb = TRUE)
    message(".",appendLF = FALSE)
  }
  
  if (!is(mSet, "character")){
    #MessageOutput("Peak Feature Analyzing...", SuppressWeb = TRUE)
    
    #xset <- mSet[["xcmsSet"]]
    
    result <- calcPPS2(mSet, isotopeIdentification)
    result[1] <- task   
    
    tmp_cv<- try(suppressMessages(calcCV(mSet)),silent = TRUE);
    if (is(tmp_cv, "try-error")){
      result[6]<-0;
    } else {
      result[6] <- tmp_cv;
    }
    
    tmp_RCS_GS <- try(suppressMessages(calcRCS_GSValues(mSet)),silent = TRUE);
    
    if (is(tmp_RCS_GS,"try-error")){
      result[8] <- result[7] <- 0;
    } else {
      result[7] <- tmp_RCS_GS$RCS;
      result[8] <- tmp_RCS_GS$GS;
    };
    
    tmp_GaussianSI <- try(calcGaussianS(mSet, object,
                                      useNoise = as.numeric(Set_parameters[[task]]$noise)),
                        silent = TRUE);
    
    if (is(tmp_GaussianSI,"try-error")){
      result[9]<-0;
    } else {
      result[9]<-tmp_GaussianSI
    };
    
    names(result)[c(6,7,8,9)]<-c("CV","RCS","GS","GaussianSI")
    
    #result
    #MessageOutput("Peak Feature Analyzing Done !\n", SuppressWeb = TRUE)
    
  } else{
    result<-c(task,0,0,0,0,0,0,0,0)
  }
  return(result)
  
}

#' @title Cluster of Peak Picking and Alignment
#' @description Cluster of Peak Picking and Alignment
#' @param object MSnExp object, the trimmed or the original data.
#' @param object_mslevel List, the parsed metabolomics scans produced by PeakPicking_prep.
#' @param Set_parameters Matrix, the parameters combination produced automatically according to 
#' the primary parameters input.
#' @param task Numeric, task order for XCMS paramters table to run the peak picking and alignment.
#' @param BPPARAM MulticoreParam method, used to set the parallel method. Default is bpparam().
#' @noRd
#' @import MSnbase
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

calculateSet_doe <- function(object, object_mslevel, Set_parameters, task = 1,
                             BPPARAM = bpparam()) {
  
  if (length(Set_parameters) < 33){ # deal with the usual processing case
    param <- updateRawSpectraParam(Set_parameters)
  } else { # deal with the optimization case, usaully 44 (33 each set)
    param <- updateRawSpectraParam(Set_parameters[[task]])
  }
  
  mSet <- calculatePPKs(object, object_mslevel, param, BPPARAM = bpparam())
  
  mSet <- calculateGPRT(mSet, param)
  
  return(mSet)
}

#' @title Peak picking Method
#' @description Wrapped Peak picking Method
#' @param object MSnExp object, the trimmed or the original data.
#' @param object_mslevel List, the parsed metabolomics scans produced by PeakPicking_prep.
#' @param param Matrix, the parameters combination produced automatically according to 
#' @param BPPARAM MulticoreParam method, used to set the parallel method. Default is bpparam().
#' @param msLevel Numeric, to specifiy the msLevel, only 1 permitted for now. 2 will be supported 
#' in the near future.
#' @import MSnbase
#' @noRd
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

calculatePPKs<-function(object, object_mslevel,param,
                        BPPARAM = bpparam(),msLevel = 1){
  
  if (param$Peak_method == "centWave" | param$Peak_method == "matchedFilter") {
    mSet <- try(PeakPicking_core(object, object_mslevel,
                                 param = param,
                                 msLevel = 1),
                silent = TRUE)
  } else {
    stop("Other peak picking method cannot be supported for now !")
  }
}

#' @title Alignment Method
#' @description Wrapped Peak Alignment Method
#' @param mSet mSet object, the data produced by 'calculatePPKs' function.
#' @param param Matrix, the parameters combination 
#' produced automatically according to 
#' @noRd
#' @import MSnbase
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

calculateGPRT<-function (mSet,param){
  
  mSetTmp <- new("mSet");
  mSetTmp@params <- param;
  mSetTmp@params$BlankSub <- FALSE;
  if(!is(mSet, "try-error")){
    mSetTmp@peakpicking <- mSet$msFeatureData;
    mSetTmp@rawInMemory <- mSet$inMemoryData;
  }

  mSetTmp <- try(PerformPeakAlignment(mSetTmp),silent = TRUE);
  
  gc();
  mSetTmp <- try(PerformPeakFiling (mSetTmp),silent = TRUE);
  gc();
  if (is(mSetTmp,"try-error")) {
    mSet<-"Xset_NA";
  } else {
    mSet <- mSetTmp;
  }
  return(mSet)
}

#' @title Calculate PPS method
#' @description Calculate PPS method
#' @param mSet xcmsSet Object, this object is produced by 'calculateSet_doe' 
#' function, and transformed 
#' with as(objec,'xcmsSet') function.
#' @param isotopeIdentification Character, IsotopeIdentidication method, 
#' usually includes 'IPO' and 'CAMERA'.
#' @noRd
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

calcPPS2 <- function(mSet, isotopeIdentification=c("IPO", "CAMERA")) {
  
  isotopeIdentification <- match.arg(isotopeIdentification);
  
  ret <- vector(mode="numeric", 5); #array(0, dim=c(1,5)) 
  names(ret) <- c("ExpId", "#peaks", "#NonRP", "#RP", "PPS");
  if(is.null(mSet)) {
    return(ret)
  } 
  
  if(nrow(peaks_IPO(mSet)) == 0) {
    return(ret)
  }
  
  peak_source <- peaks_IPO(mSet)[,c("mz", "rt", "sample", "into", "mzmin", 
                                    "mzmax", "rtmin", "rtmax"),drop=FALSE]
  ret[2] <- nrow(peak_source)
  
  if(isotopeIdentification == "IPO")
    iso_mat <- findIsotopes.IPO(mSet)  
  else
    iso_mat <- findIsotopes.CAMERA(mSet)
  
  samples <- unique(peak_source[,"sample"]);
  isotope_abundance = 0.01108;
  
  #calculating low intensity peaks
  for(sample in samples) {
    non_isos_peaks <- peak_source
    
    if(nrow(iso_mat) > 0) {
      non_isos_peaks <- peak_source[-unique(c(iso_mat)),,drop=FALSE] 
    } 
    
    speaks <- non_isos_peaks[non_isos_peaks[,"sample"]==sample,,drop=FALSE]
    intensities <- speaks[,"into"]
    na_int <- is.na(intensities)
    intensities <- intensities[!na_int]
    
    if(length(intensities)>0) {
      tmp <- intensities[order(intensities)]
      int_cutoff <- mean(tmp[seq_len(max(round((length(tmp)/33),0),1))])
      
      masses <- speaks[!na_int, "mz"]
      #floor((masses-2*CH3)/CH2) + 2
      maximum_carbon <- calcMaximumCarbon(masses)
      carbon_probabilty <- maximum_carbon*isotope_abundance
      
      iso_int <- intensities * carbon_probabilty
      
      not_loq_peaks <- sum(iso_int>int_cutoff)
      ret[3] <- ret[3] + not_loq_peaks
    }
  }#end_for_sample    
  
  ret[4] <- length(unique(c(iso_mat)))
  if(ret[3] == 0) {
    ret[5] <- (ret[4]+1)^1.5/(ret[3]+1)  
  } else {    
    ret[5] <- ret[4]^1.5/ret[3]  
  }
  
  return(ret)
  
}

#' @title Calculatre CV method
#' @description Calculatre CV method
#' @param mSet mSet Object, this object is produced by 'calculateSet_doe' function.
#' @noRd
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

calcCV<-function(mSet){
  
  ncount<-length(mSet@rawInMemory@phenoData[["sample_name"]])
  
  table.data<-creatPeakTable(mSet);
  
  table.data$abundance.mean <- apply(table.data[, 9:(8 + ncount)],1, 
                                     FUN = mean, na.rm = TRUE);
  table.data$abundance.sd <- apply(table.data[, 9:(8 + ncount)],1, 
                                   FUN = sd, na.rm = TRUE);
  table.data$abundance.cv <- 
    (table.data$abundance.sd * 100)/table.data$abundance.mean;
  
  cv.min<-min(table.data$abundance.cv, na.rm = TRUE);
  cv.0.25<-as.numeric(quantile(table.data$abundance.cv, probs = 0.25, na.rm = TRUE));
  cv.med<-median(table.data$abundance.cv, na.rm = TRUE);
  cv.0.75<-as.numeric(quantile(table.data$abundance.cv, probs = 0.75, na.rm = TRUE));
  cv.max<-max(table.data$abundance.cv, na.rm = TRUE);
  cv.score<-1/(0.75*cv.med+0.25*(cv.max-cv.med))
  
  return(cv.score)
}

#' @title Calculatre RCS and GS method
#' @description  Calculatre RCS and GS method
#' @param mSet mSet Object, this object is produced by 'calculateSet_doe' function.
#' @noRd
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

calcRCS_GSValues<-function(mSet){
  score.ret<-getRGTVValues(mSet)
  return(list(GS=score.ret[["GS"]],RCS=score.ret[["RCS"]]))
}

#' @title Calculatre Gaussian Peak Ratio method
#' @description Calculatre Gaussian Peak Ratio method
#' @param mSet OptiLCMS Object, this object is produced by 'calculateSet_doe' function.
#' @param object MSnExp object, the trimmed or the original data 
#' (Generated by ImportRawMSData function with "inMemory" mode).
#' @param useNoise Numeric, the noise level removed to evalute the gaussian peak.
#' @param BPPARAM MulticoreParam method, used to set the parallel method. Default is bpparam().
#' @noRd
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

calcGaussianS <-function(mSet, object, useNoise, BPPARAM = bpparam()){                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  

  if (identical(useNoise, numeric(0))) {
    useNoise <- 0
  }
  peakmat <- mSet@peakfilling$msFeatureData$chromPeaks;
  peakmat_set <- split.data.frame(peakmat, peakmat[, "sample"])
  
  object <- mSet@rawInMemory;
  
  ## Protect the environment from overwrite
  newAssayEnvir <- new.env();
  copyEnv(mSet@rawInMemory@assayData, newAssayEnvir);
  object@assayData <- newAssayEnvir;
  
  ## Adjusted RT application
  scan_names <- sort(names(object@assayData));
  adjRTs <- unname(unlist(mSet@peakfilling[["msFeatureData"]][["adjustedRT"]]))
  
  for(s in seq_along(object)){
    scanNM <- scan_names[s];
    object@assayData[[scanNM]]@rt <- adjRTs[s];
  }
  
  ## select different platforms
  if (.Platform$OS.type == "unix") {
    
    BPPARAM = MulticoreParam(2L);

    res <- bplapply(peakmat_set, extFUN, 
                    object = object,
                    useNoise = useNoise,
                    BPPARAM = BPPARAM)
    
    # res <- lapply(peakmat_set, extFUN, object = object,
    #                 useNoise = useNoise)
    # print(res)
    
  }
  if (.Platform$OS.type == "windows") {

    res <- lapply(peakmat_set, extFUN, object = object, 
                  useNoise = useNoise)
    res <- unlist(res)
  }
  
  rm(object);
  rm(newAssayEnvir);
  gc();
  
  return(mean(sapply(res, FUN = function(x) {
    x
  })))
  
}

#' @importFrom stats nls
SSgaussStats <- function(ints){

  startValue <- try(nls.start(y ~ GaussModel(x, mu, sigma, h),
                              data.frame(x = seq_along(ints), y = ints)),
                    silent = TRUE);
  
  fit <- try(nls(y ~ GaussModel(x, mu, sigma, h), 
                 data.frame(x = seq_along(ints), y = ints),
                 start = startValue), 
             silent = TRUE)
  
  return(fit)
}

#' @importFrom stats fitted cor.test
extFUN <- function(z, object, useNoise) {
  
  if (nrow(z) > 150) {
    z <- extractPeakForGaussian(z)
    # z <- z[sort(sample(seq_len(nrow(z)), 150)), ]
  }
  currentSample <- suppressMessages(
    MSnbase::filterRt(MSnbase::filterFile(object, 
                                          z[1, "sample"]), 
                      rt = range(z[, c("rtmin", "rtmax")])))
  
  corr <- unlist(sapply(seq_len(nrow(z)), FUN = function(i) {
    
    corr <- 0.1;
    mzRange <- z[i, c("mzmin", "mzmax")] + c(-0.001, 0.001);
    rtRange <- z[i, c("rtmin", "rtmax")];
    
    ints <- try(suppressWarnings(MSnbase::intensity(
      MSnbase::filterMz(MSnbase::filterRt(currentSample,
                                          rtRange),
                        mzRange))), silent = TRUE)
    
    if(is(ints, "try-error")){
      return(corr);
    }
    
    ints[lengths(ints) == 0] <- 0
    ints <- as.integer(unlist(ints))
    ints <- ints[!is.na(ints)]
    ints <- ints[ints > useNoise]
    if (length(ints)) {
      ints <- ints - min(ints)
      # if (max(ints) > 0)
      #   ints <- ints/max(ints)
      
      fit <- SSgaussStats(ints);
      
      if (is(fit, "try-error")) { # Still error - record error !
        #write.table(paste0(fit[[1]],Sys.time()), file = "error_report.txt",
        #append = TRUE,eol = "\n");
        corr <- 0.1;
      } else {
        if (sum(!is.na(ints - fitted(fit))) > 4 && 
            sum(!is.na(unique(ints))) > 4 && sum(!is.na(unique(fitted(fit)))) > 
            4) {
          
          cor <- NULL;
          
          old <- options();
          on.exit(options(old));
          
          options(show.error.messages = FALSE);
          cor <- try(cor.test(ints, fitted(fit), 
                              method = "pearson", use = "complete"));
          
          
          if (!is.null(cor) && cor$p.value <= 0.05) {
            corr <- cor$estimate
          }
          else if (!is.null(cor) && cor$p.value > 
                   0.05) {
            corr <- cor$estimate * 0.85 # Give a penalty on that!
          }
        }
        else {
          corr <- 0.1
        }
      }
    }
    return(corr)
  }))
  gaussian.peak.ratio <- nrow(z[corr >= 0.9, , drop = FALSE])/nrow(z)
  return(gaussian.peak.ratio)
}


extractPeakForGaussian <- function(z){
  
 oz <- z[,"into"][order(z[,"into"])];
 znum <- nrow(z);
 ozs <- seq(from = 1, 
            to = znum, 
            by = ceiling(znum/150))
 return(z[sort(names(oz[ozs])),])
  
}

#' @title Identify whether results improved or not
#' @description Identify whether results improved or not
#' @param history List, an interal media objects used to save the optimization results of peaks.
#' @noRd
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

resultIncreased_doe <- function(history) {
  
  index = length(history)
  if(history[[index]]$PPS$PPS == 0 & index == 1){
    MessageOutput(paste("Error: No isotopes detected in your 
                        Intensive ROI of your data, Please manually customize the parameter!"), "\n")
    return(NULL)
  }
  
  if(index < 2)
    return(TRUE)
  
  if(history[[index-1]]$QS >= history[[index]]$QS)
    return(FALSE)
  
  return(TRUE)
  
}

#' @title Noise_evaluation based on Kernal density model
#' @description This functions handles the evaluation on the data noise (noise and prefilter parameters) 
#' and the identification on the molecule weights deviation evaluation.
#' @param raw_data MSnExp object, the (trimmed) data in memory produced by 'PerformDataTrimming'.
#' @import MSnbase
#' @import progress
#' @importFrom stats sd
#' @importFrom stats weighted.mean
#' @noRd
#' @references McLean C (2020). Autotuner: Automated parameter 
#' selection for untargeted metabolomics data processing
#' @author Zhiqiang Pang \email{zhiqiang.pang@mail.mcgill.ca} 
#' Jeff Xia \email{jeff.xia@mcgill.ca}
#' Mcgill University
#' License: GNU GPL (>= 2)

Noise_evaluate <- function (raw_data) {
  
  mSet <- list()
  mSet$time <- mSet$intensity <- list()
  mSet$time <- split(MSnbase::rtime(raw_data), fromFile(raw_data))
  mSet$intensity <- split(MSnbase::tic(raw_data), fromFile(raw_data))
  
  signals <- suppressMessages(lapply(mSet$intensity, FUN = function(y) {
    lag <- 15
    threshold <- 2
    influence <- 0.1
    signals <- rep(0, length(y))
    filteredY <- y[seq_len(lag)]
    avgFilter <- NULL
    stdFilter <- NULL
    avgFilter[lag] <- mean(y[seq_len(lag)])
    stdFilter[lag] <- sd(y[seq_len(lag)])
    for (i in (lag + 1):length(y)) {
      if (abs(y[i] - avgFilter[i - 1]) > threshold * stdFilter[i - 
                                                               1]) {
        if (y[i] > avgFilter[i - 1]) {
          signals[i] <- 1
        }
        else {
          signals[i] <- -1
        }
        filteredY[i] <- influence * y[i] + (1 - influence) * 
          filteredY[i - 1]
      }
      else {
        signals[i] <- 0
        filteredY[i] <- y[i]
      }
      avgFilter[i] <- mean(filteredY[(i - lag):i])
      stdFilter[i] <- sd(filteredY[(i - lag):i])
    }
    return(list(signals = signals, avgFilter = avgFilter, 
                stdFilter = stdFilter))
  }))
  factorCol <- "Groups"
  Groups <- basename(raw_data@processingData@files)
  metadata <- data.frame(Sample.Name = basename(fileNames(raw_data)), 
                         Groups = Groups)
  
  sample_names <- paste(unlist(metadata[, factorCol]), seq_len(nrow(metadata)))
  peakList <- list();
  varExpThresh <- 0.8;
  
  for (index in seq_along(sample_names)) {
    peaks <- rle(signals[[index]]$signals)
    counter <- 1
    peakGroups <- list()
    for (rleIndex in seq_along(peaks$lengths)) {
      curValue <- peaks$values[rleIndex]
      if (curValue == 1) {
        peakGroups[[counter]] <- 
          data.frame(index = (startPoint + 1), length = peaks$lengths[rleIndex])
        startPoint <- startPoint + peaks$lengths[rleIndex]
        counter <- counter + 1
      }
      else {
        if (rleIndex == 1) {
          startPoint <- peaks$lengths[rleIndex]
        }
        else {
          startPoint <- startPoint + peaks$lengths[rleIndex]
        }
      }
    }
    peakGroups <- Reduce(peakGroups, f = rbind)
    signals[[index]]$signals[peakGroups$index[peakGroups$length == 
                                                1] + 1] <- 1
    findPeaks <- rle(signals[[index]]$signals == 1)
    counter <- 1
    peakGroups <- list()
    for (rleIndex in seq_along(findPeaks$lengths)) {
      curValue <- findPeaks$values[rleIndex]
      if (curValue) {
        start <- (startPoint + 1)
        startPoint <- startPoint + findPeaks$lengths[rleIndex]
        end <- startPoint
        peakGroups[[counter]] <- data.frame(start, end, 
                                            length = end - start + 1)
        counter <- counter + 1
      }
      else {
        if (rleIndex == 1) {
          startPoint <- findPeaks$lengths[rleIndex]
        }
        else {
          startPoint <- startPoint + findPeaks$lengths[rleIndex]
        }
      }
    }
    peakGroups <- Reduce(rbind, peakGroups)
    npeaks <- 10;
    
    if (nrow(peakGroups) < 10) {
      npeaks <- nrow(peakGroups)
      #warning("You are strongly advised to use wider rt.idx or more 
      #samples or other trimming strategy !")
    }
    peakGroups <- peakGroups[order(peakGroups$length, decreasing = TRUE), 
                             ]
    peak_times <- list()
    for (j in seq_len(nrow(peakGroups))) {
      peak_times[[j]] <- mSet$time[[index]][peakGroups$start[j]:peakGroups$end[j]]
    }
    max_peak_length <- max(vapply(X = peak_times, FUN = length, 
                                  FUN.VALUE = numeric(1)))
    peak_table <- data.frame(matrix(nrow = max_peak_length, 
                                    ncol = npeaks + 1))
    peak_table[, 1] <- seq_len(max_peak_length)
    colnames(peak_table) <- c("peakLenth", paste("peak", 
                                                 seq_len(npeaks)))
    for (column in seq(from = 2, to = ncol(peak_table))) {
      peak <- peak_times[[column - 1]]
      peak_table[c(seq_along(peak)), column] <- peak
    }
    peak_table$peakLenth <- NULL
    peakList[[index]] <- peak_table
  }
  names(peakList) <- sample_names
  peak_table <- data.frame()
  counter <- 1
  for (sampleIndex in seq_along(peakList)) {
    time <- mSet$time[[sampleIndex]]
    intensity <- mSet$intensity[[sampleIndex]]
    peaks <- peakList[[sampleIndex]]
    peakIndexTable <- data.frame()
    for (peakColIndex in seq_len(ncol(peaks))) {
      tempPeakWidthEst <- peakwidth_est(peak_vector = peaks[, 
                                                            peakColIndex], 
                                        time, intensity, start = NULL, 
                                        end = NULL, old_r2 = NULL)
      if (length(time)/5 < diff(tempPeakWidthEst)) {
        next();
        # stop(paste("One peak was over 1/5 of all scans in length.", 
        #            "This is probably an error."))
      }
      if (tempPeakWidthEst[2] > length(time)) {
        tempPeakWidthEst[2] <- length(time)
      }
      peakIndexTable <- rbind(peakIndexTable, c(tempPeakWidthEst, 
                                                peakColIndex))
    }
    colnames(peakIndexTable) <- c("peakStart", "peakEnd", 
                                  "peakID")
    for (curPeakIndexCol in seq_along(peakIndexTable$peakStart)) {
      start <- peakIndexTable$peakStart[curPeakIndexCol]
      end <- peakIndexTable$peakEnd[curPeakIndexCol]
      storeRow <- data.frame(peak_width = time[end] - 
                               time[start], Start_time = time[start], 
                             End_time = time[end], 
                             Start_name = names(time)[start], 
                             End_name = names(time)[end], 
                             Sample = sampleIndex, 
                             Mid_point_time = (time[start] + 
                                                 time[end])/2, 
                             Max_intensity = max(intensity[start:end]), 
                             Maxima_time = time[start + which(intensity[start:end] %in% 
                                                                max(intensity[start:end]))])
      peak_table <- rbind(peak_table, storeRow)
      counter <- counter + 1
    }
  }
  pb <- progress_bar$new(format = "Evaluating [:bar] :percent Time left: :eta", 
                         total = length(unique(peak_table$Sample)), clear = TRUE, 
                         width = 75)
  totalEstimates <- list()
  for (j in unique(peak_table$Sample)) {
    pb$tick()
    currentTable <- peak_table[peak_table$Sample == j, ]
    raw_data_current <- filterFile(raw_data, j)
    header_rt <- unname(mSet[["time"]][[j]])
    header <- MSnbase::header(raw_data_current)
    allMzs <- MSnbase::mz(raw_data_current)
    allInt <- MSnbase::intensity(raw_data_current)
    mzDb <- list()
    for (i in seq_along(allInt)) {
      mzDb[[i]] <- cbind(mz = allMzs[[i]], intensity = allInt[[i]])
    }
    rm(allMzs, allInt)
    pickedParams <- list()
    for (curPeak in seq_len(nrow(currentTable))) {
      start <- currentTable[curPeak, "Start_time"]
      end <- currentTable[curPeak, "End_time"]
      width <- currentTable$peak_width[curPeak]
      observedPeak <- list(start = start, end = end)
      rate <- mean(diff(header_rt[header$ms.level == 1L]))
      scansOfPeak <- which(observedPeak$start < header_rt & 
                             header_rt < observedPeak$end)
      peakHead <- header[scansOfPeak, ]
      ms1 <- peakHead$ms.level == 1L
      rm(peakHead, ms1)
      peakMassSpectras <- mzDb[scansOfPeak]
      for (i in seq_along(scansOfPeak)) {
        peakMassSpectras[[i]] <- cbind(peakMassSpectras[[i]], 
                                       scansOfPeak[i])
      }
      peakMassSpectras <- Reduce(rbind, peakMassSpectras)
      colnames(peakMassSpectras) <- c("mz", "intensity", 
                                      "scan")
      peakMassSpectras <- data.frame(peakMassSpectras)
      peakMassSpectras <- peakMassSpectras[order(peakMassSpectras$mz), ]
      peakMassSpectras <- peakMassSpectras[peakMassSpectras$intensity > 0, ]
      sortedAllEIC <- peakMassSpectras
      matchedMasses <- rle(diff(sortedAllEIC$mz) < 0.005)
      noiseAndPeaks <- filterPeaksfromNoise(matchedMasses)
      no_match <- noiseAndPeaks[[1]]
      truePeaks <- noiseAndPeaks[[2]]
      rm(noiseAndPeaks)
      approvedPeaks <- findTruePeaks(truePeaks, sortedAllEIC);
      if(is.null(approvedPeaks)){(next)()}
      overlappingScans <- sum(approvedPeaks$multipleInScan)
      ppmEst <- try(filterPpmError(approvedPeaks, useGap = TRUE, 
                                   varExpThresh, returnPpmPlots = FALSE, 
                                   plotDir = NULL, observedPeak), 
                    silent = TRUE);
      if(is(ppmEst, "try-error") | is.na(ppmEst)){(next)()}
      ppmObs <- approvedPeaks$meanPPM
      ppmObs <- strsplit(split = ";", x = as.character(ppmObs))
      ppmObs <- lapply(ppmObs, as.numeric)
      noisyBin <- unlist(lapply(ppmObs, function(ppm) {
        any(ppm > ppmEst)
      }))
      approvScorePeaks <- approvedPeaks[!noisyBin, ]
      
      SNest <- try(estimateSNThresh(no_match, sortedAllEIC, 
                                    approvScorePeaks), silent = TRUE);
      
      if(is(SNest,"try-error")){(next)()}
      
      SNest <- suppressWarnings(min(SNest));
      if (is.infinite(SNest)) {
        SNest <- 25
      }
      scanEst <- min(approvScorePeaks$scanCount)
      noiseEst <- min(approvScorePeaks$minIntensity) - 
        1000
      if (noiseEst < 0) {
        noiseEst <- 0
      }
      intensityEst <- min(approvScorePeaks$Intensity)/sqrt(2)
      estimatedPeakParams <- data.frame(ppm = ppmEst, 
                                        noiseThreshold = noiseEst, 
                                        peakCount = nrow(approvedPeaks), 
                                        prefilterI = intensityEst, 
                                        prefilterScan = scanEst, 
                                        TenPercentQuanSN = unname(SNest))
      if (is.null(estimatedPeakParams)) {
        next
      }
      pickedParams[[curPeak]] <- cbind(estimatedPeakParams, 
                                       startTime = start, 
                                       endTime = end, sampleID = j)
    }
    sampleParams <- Reduce(rbind, pickedParams)
    totalEstimates[[j]] <- sampleParams
  }
  
  eicParamEsts <- Reduce(rbind, totalEstimates)
  param <- list()
  param$ppm <- round(weighted.mean(eicParamEsts$ppm, eicParamEsts$peakCount),1)
  param$noise <- min(eicParamEsts$noiseThreshold, na.rm = TRUE)
  param$value_of_prefilter <- min(eicParamEsts$prefilterI, 
                                  na.rm = TRUE)
  param$prefilter <- min(eicParamEsts$prefilterScan, na.rm = TRUE)
  param
}

##### -------======   function kit from package IPO  ====----######

#' @references Gunnar Libiseller et al. 2015. IPO: a tool for automated 
#' optimization of XCMS parameters
#' @references https://github.com/glibiseller/IPO

getClusterType <- function() {
  if( .Platform$OS.type=="unix" ) {
    return("FORK")
  }
  return("PSOCK")
}

peaks_IPO <- function(mSet) {
  peaks_act <-mSet@peakfilling$msFeatureData$chromPeaks;
  if(is.null(peaks_act)){
    peaks_act <-mSet@peakRTcorrection[["chromPeaks"]];
  }
  if (!("sample" %in% colnames(peaks_act))) {
    colnames(peaks_act)[colnames(peaks_act) == ""] <- "sample"
  }
  peaks_act
}

calcMaximumCarbon <- function(masses) {  
  
  carbon = 12.0
  hydrogen  = 1.0078250170
  CH3 = carbon + 3 * hydrogen
  CH2 = carbon + 2 * hydrogen  
  
  maximum_carbon <- floor((masses-2*CH3)/CH2) + 2
  
}    
getMaximumLevels <- function(model) {  
  # dimension of the modeled space
  dimensions <- length(model$coding)
  
  # define grid, to test for maximum
  if(dimensions > 6) {
    testSpace <- seq(-1,1,0.2) # 11 points
  } else { 
    testSpace <- seq(-1,1,0.1) # 21 points
  }
  
  testSize <- 10^6 # maximum number of grid points for one test
  # amount for testing each point in testSpace
  testAmount <- length(testSpace)^dimensions 
  i <- 1
  max <- rep(-1, dimensions+1) # start maximum response + setting
  # if there are more test points (=testAmount), than testSize,
  # then the tests are split and each subset is tested seperately
  while(i < testAmount) {
    testdata <- expand.grid.subset(i:(i+testSize), testSpace, dimensions)
    if(dimensions==1)
      names(testdata) <- names(model$coding)
    max_tmp <- getMaxSettings(testdata, model)
    if(max_tmp[1]>max[1]) # if better solution (i.e. test response)
      max <- max_tmp
    i <- i + testSize + 1
  }
  
  return(max)
  
}
getMaxSettings <- function(testdata, model) {
  
  response <- predict(model, testdata)
  max_response <- max(response)
  # select row(s) corresponding to max
  max_settings <- testdata[response==max_response,,drop=FALSE]
  ret <- max_response
  
  for(i in seq_len(ncol(testdata))) {
    levels <- max_settings[,i] # all settings of variable i
    if(all(c(-1,1) %in% levels)) # if both borders are in maximum settings
      ret <- cbind(ret, NA)
    else
      ret <- cbind(ret,levels[1]) # take first setting
  }
  
  colnames(ret) <- c("response", paste("x", seq_len(ncol(testdata)), sep=""))
  return(ret)
}
expand.grid.subset  <- function(subset, sequence, dimensions) { 
  # generate a list, with sequence for each dimension
  vars <- list()
  for(i in seq_len(dimensions)) {
    vars[[i]] <- sequence
  }
  names(vars) <- paste("x", seq_len(dimensions), sep="")
  
  # number of points in sequence grid
  maximumSubset <- length(sequence)^dimensions 
  # from min(subset)) to min(maximumSubset, max(subset)) OR
  # from maximumSubset to maximumSubset
  subset <- min(maximumSubset,min(subset)):min(maximumSubset, max(subset))
  
  #nv <-  #length(vars) 
  # number of values per variable = length(sequence)
  lims <- sapply(vars,length) 
  stopifnot(length(lims) > 0, # i.e. dimensions > 0
            subset <= prod(lims), # i.e. subset <= maximumSubset
            length(names(vars)) == dimensions) # i.e. dimensions = dimensions
  # empty list of length names(vars)
  res <- structure(vector("list",dimensions), .Names = names(vars))
  
  if (dimensions > 1) {
    for(i in dimensions:2) { # count down to 2: set up grid top down
      # %% = mod, %/% = integer division
      f <- prod(lims[seq_len(i-1)]) # number of grid points up to variable nr. (i-1)
      # repeat each element on grid 1:f
      res[[i]] <- vars[[i]][(subset - 1)%/%f + 1] 
      subset <- (subset - 1)%%f + 1 
    } 
  }
  res[[1]] <- vars[[1]][subset] 
  as.data.frame(res) 
} 

typeCastParams <- function(params) {
  ret_1 <- list()
  ret_2 <- list()  
  ret <- list()
  for(i in  seq_along(params)) {
    factor <- params[[i]]
    if(length(factor) == 2) {
      ret_1[[(length(ret_1)+1)]] <- factor
      names(ret_1)[length(ret_1)] <- names(params)[i]
    } else {
      ret_2[[(length(ret_2)+1)]] <- factor
      names(ret_2)[length(ret_2)] <- names(params)[i]
    }
  }
  ret$to_optimize <- ret_1
  ret$no_optimization <- ret_2
  
  return(ret)
}
getCcdParameter <- function(params) {
  
  lower_bounds <- unlist(lapply(X=params, FUN=function(x) x[1]))
  higher_bounds <- unlist(lapply(X=params, FUN=function(x) x[2]))
  
  steps <- (higher_bounds - lower_bounds)/2
  
  # formula for each parameter, that transformes values from the range
  # to [0, 1]
  x <- paste("x", seq_along(params), " ~ (", c(names(params)), " - ", 
             (lower_bounds + steps), ")/", steps, sep="")
  
  # list with single formulas as entries
  formulae <- list()
  for(i in seq_along(x))
    formulae[[i]] <- as.formula(x[i])  
  
  design <- rsm::ccd(length(params), # number of variables
                     n0 = 1, # number of center points
                     alpha = "face", # position of the ‘star’ points
                     randomize = FALSE, 
                     inscribed = TRUE, # TRUE: axis points are at +/- 1 and the
                     # cube points are at interior positions
                     coding = formulae) # List of coding formulas for the design
  # variables
  return(design)
  
}
combineParams <- function(params_1, params_2) {
  len <- max(unlist(sapply(params_1, length)))
  #num_params <- length(params_1)
  
  p_names <- c(names(params_1), names(params_2))
  matchedFilter <- "fwhm" %in% p_names
  
  for(i in seq_along(params_2)) {
    new_index <- length(params_1) + 1
    fact <- params_2[[i]]
    params_1[[new_index]] <- fact
    if(matchedFilter) {
      if(p_names[new_index] == "sigma" && fact == 0) {
        # update values for sigma if zero
        if("fwhm" %in% names(params_1)) {
          params_1[[new_index]][seq_len(len)] <- params_1$fwhm/2.3548
        } else {
          params_1[[new_index]][seq_len(len)] <- params_2$fwhm/2.3548
        }
      } else if(p_names[new_index] == "mzdiff" && fact == 0) {
        # update values for mzdiff if zero
        if("step" %in% names(params_1)) {
          if("steps"  %in% names(params_1)) {
            params_1[[new_index]][seq_len(len)] <- 0.8-params_1$step*params_1$steps
          } else {
            params_1[[new_index]][seq_len(len)] <- 0.8-params_1$step*params_2$steps
          }
        } else {
          if("steps"  %in% names(params_1)) {
            params_1[[new_index]][seq_len(len)] <- 0.8-params_2$step*params_1$steps
          } else {
            params_1[[new_index]][seq_len(len)] <- 0.8-params_2$step*params_2$steps
          }
        }
      } else {  
        # standard: replicate value
        params_1[[new_index]][seq_len(len)] <- fact
      }
    } else {
      # standard: replicate value
      params_1[[new_index]][seq_len(len)] <- fact
    }
  } 
  names(params_1) <- p_names   
  return(params_1)
  
}
getRGTVValues <- function(mSet, exp_index=1, retcor_penalty=1) {
  
  relative_rt_diff <- c();
  peaks <- mSet@peakfilling$msFeatureData$chromPeaks;
  #fgs <- mSet$FeatureGroupTable
  fgs <- mSet@peakfilling$FeatureGroupTable
  groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)])
  rownames(groups) <- NULL
  groupidx <- fgs$peakidx
  
  if(nrow(groups) > 0) {
    for(i in seq_len(nrow(groups))) {
      feature_rtmed <- groups[i, "rtmed"]
      relative_rt_diff <- 
        c(relative_rt_diff, 
          mean(abs(feature_rtmed - 
                     peaks_IPO(mSet)[groupidx[[i]], "rt"]) / feature_rtmed))
    }
    good_groups <- 
      sum(unlist(lapply(X=groupidx, FUN = function(x, mSet) {
        ifelse(length(unique(peaks_IPO(mSet)[x,"sample"])) == 
                 length(fileNames(mSet@rawInMemory)) & 
                 length(peaks_IPO(mSet)[x,"sample"]) == 
                 length(fileNames(mSet@rawInMemory)), 1, 0)
      }, mSet)))
    bad_groups <- nrow(groups) - good_groups
  } else {
    relative_rt_diff <- 1
    good_groups <- 0
    bad_groups <- 0   
  }
  
  tmp_good_groups <- good_groups + ifelse(bad_groups==0, 1, 0)
  tmp_bad_groups <- bad_groups + ifelse(bad_groups==0, 1, 0)
  
  ARTS <- (mean(relative_rt_diff)) * retcor_penalty
  
  ret <- list(exp_index   = exp_index, 
              good_groups = good_groups, 
              bad_groups  = bad_groups, 
              GS          = tmp_good_groups^2/tmp_bad_groups, 
              RCS         = 1/ARTS)
  
  ret$retcor_done = retcor_penalty        
  
  return(ret)  
}
findIsotopes.IPO <- function(mSet, checkPeakShape=c("none", "borderIntensity", 
                                                    "sinusCurve", "normalDistr")) {
  
  checkPeakShape <- match.arg(checkPeakShape)
  
  iso_mat <- matrix(0, nrow=0, ncol=2)
  if(is.null(mSet)) {
    return(iso_mat)
  }
  
  colnames(iso_mat) <- c("12C", "13C")
  peak_source <- peaks_IPO(mSet)[,c("mz", "rt", "sample", "into", "maxo", "mzmin",
                                    "mzmax", "rtmin", "rtmax"), drop=FALSE]
  
  for(i in seq_len(ncol(peak_source))) {
    peak_source <- peak_source[!is.na(peak_source[,i]),,drop=FALSE]
  }
  
  peak_source <- cbind(seq_len(nrow(peak_source)), peak_source)
  colnames(peak_source)[1] <- "id"  
  
  #carbon = 12.0
  #hydrogen = 1.0078250170
  #CH3 = carbon + 3 * hydrogen
  #CH2 = carbon + 2 * hydrogen
  isotope_mass = 1.0033548
  isotope_abundance = 0.01108
  
  samples <- max(peak_source[,"sample"])
  
  #start_sample
  for(sample in seq_len(samples)) {
    #only looking into peaks from current sample   
    speaks <- peak_source[peak_source[,"sample"]==sample,,drop=FALSE]
    split <- 250
    rawdata <- NULL;
    if(!(checkPeakShape=="none"))
      # rawdata <- loadRaw(xcmsSource(xset@filepaths[sample]))
      stop("Other PeakShapeChecking Method will be supported later !")
    
    if(nrow(speaks)>1) {
      #speaks <- speaks[,-c("sample")]
      speaks <- speaks[order(speaks[,"mz"]),]
      
      while(!is.null(nrow(speaks)) & length(speaks) > 3) {
        part_peaks <- NULL
        #splitting the data into smaller pieces to improve speed    
        if(nrow(speaks) < split) {
          part_peaks <- speaks
        } else {          
          upper_bound <- speaks[split,"mzmax"] + isotope_mass          
          end_point <- sum(speaks[,"mz"] < upper_bound)
          part_peaks <- speaks[seq_len(end_point),,drop=FALSE]
        }
        
        rt <- part_peaks[,"rt"]
        rt_window <- rt * 0.005
        rt_lower <- part_peaks[,"rt"] - rt_window
        rt_upper <- part_peaks[,"rt"] + rt_window
        rt_matrix <-  
          t(matrix(rep(rt, nrow(part_peaks)), ncol=nrow(part_peaks)))
        rt_matrix_bool <- rt_matrix >= rt_lower & rt_matrix <= rt_upper
        
        mz <- part_peaks[,"mz"]
        #isotope_masses - mz_window
        mz_lower <- part_peaks[,"mzmin"] + isotope_mass
        #isotope_masses + mz_window
        mz_upper <- part_peaks[,"mzmax"] + isotope_mass
        mz_matrix <-  
          t(matrix(rep(mz, nrow(part_peaks)), ncol=nrow(part_peaks)))
        mz_matrix_bool <- mz_matrix >= mz_lower & mz_matrix <= mz_upper
        
        rt_mz_matrix_bool <- rt_matrix_bool & mz_matrix_bool
        
        rt_mz_peak_ids <- which(rowSums(rt_mz_matrix_bool)>0)
        calculations <- min(split, nrow(speaks))
        rt_mz_peak_ids <- rt_mz_peak_ids[rt_mz_peak_ids < calculations]
        
        for(i in rt_mz_peak_ids) {
          current <- part_peaks[i, ,drop=FALSE]
          rt_mz_peaks <- part_peaks[rt_mz_matrix_bool[i,],,drop=FALSE]
          rt_difference <- 
            abs(current[,"rt"] - rt_mz_peaks[, "rt"]) / current[,"rt"]
          rt_mz_peaks <- cbind(rt_mz_peaks, rt_difference)
          #test intensity_window
          #floor((current["mz"]-2*CH3)/CH2) + 2
          maximum_carbon <- calcMaximumCarbon(current[,"mz"]) 
          carbon_probabilty <- c(1,maximum_carbon)*isotope_abundance
          iso_intensity <- current[,"into"] * carbon_probabilty
          
          int_bools <- 
            rt_mz_peaks[,"into"] >= iso_intensity[1] & 
            rt_mz_peaks[,"into"] <= iso_intensity[2]
          
          if(sum(int_bools) > 0) {
            int_peaks <- rt_mz_peaks[int_bools,,drop=FALSE]
            boundary_bool <- rep(TRUE, (nrow(int_peaks)+1))
            if(!(checkPeakShape=="none")) {
              if(checkPeakShape=="borderIntensity") {
                boundary_bool <- checkIntensitiesAtRtBoundaries(
                  rawdata, 
                  rbind(current,int_peaks[,-ncol(int_peaks), drop=FALSE]))
              } else {
                if(checkPeakShape=="sinusCurve") {                
                  boundary_bool <- checkSinusDistribution(
                    rawdata, 
                    rbind(current,int_peaks[,-ncol(int_peaks),drop=FALSE]))
                } else {                  
                  boundary_bool <- checkNormalDistribution(
                    rawdata, 
                    rbind(current,int_peaks[,-ncol(int_peaks),drop=FALSE]))
                }
              }
            } #else {
            #boundary_bool <- rep(TRUE, (nrow(int_peaks)+1))
            #}              
            if(boundary_bool[1] & sum(boundary_bool[-1])>0) {                 
              iso_peaks <- int_peaks[boundary_bool[-1],,drop=FALSE]
              iso_id <- 
                iso_peaks[which.min(iso_peaks[,"rt_difference"]), "id"]
              #iso_list[[length(iso_list)+1]] <- c(current[,"id"], iso_id)            
              iso_mat <- rbind(iso_mat, c(current[,"id"], iso_id))                
            }
          }
        }
        speaks <- speaks[-(seq_len(calculations)),]
        
      }#end_while_sample_peaks 
    }
  }
  return(iso_mat)
}
checkIntensitiesAtRtBoundaries <-  function(rawdata, peaks, 
                                            minBoundaryToMaxo=1/3,ppmstep=15) {
  ret <- rep(TRUE, nrow(peaks))
  for(i in seq_len(nrow(peaks))) {
    peak <- peaks[i,]
    for(boundary in c("rtmin", "rtmax")) {
      rtIndex <- which(rawdata$rt==peak[boundary])
      if(length(rtIndex)>0) {
        if(rtIndex==length(rawdata$scanindex)) {
          rtIndices <- c(rawdata$scanindex[rtIndex], length(rawdata$mz))
        } else {
          rtIndices <- rawdata$scanindex[c(rtIndex, rtIndex+1)]
        }
        
        #only relevant mz and intensity values regarding retention time
        mz <- rawdata$mz[(rtIndices[1]+1):rtIndices[2]]
        intensities <- rawdata$intensity[(rtIndices[1]+1):rtIndices[2]]
        
        ppm <- peak[c("mzmin", "mzmax")]*ppmstep/1000000
        mzIntensities <- 
          c(0, intensities[mz>=peak["mzmin"]-ppm[1] & mz<=peak["mzmax"]+ppm[2]])
        maxBoundaryIntensity <- max(mzIntensities)
        ret[i] <- ret[i] & maxBoundaryIntensity<peak["maxo"]*minBoundaryToMaxo
      }
    }
  }
  
  return(ret)
  
}
checkSinusDistribution <- function(rawdata, peaks) {
  ret <- rep(TRUE, nrow(peaks))
  for(i in seq_len(nrow(peaks))) {
    ret[i] <- testSinusDistribution(rawdata, peaks[i,,drop=FALSE])
  }
  
  return(ret)
}
checkNormalDistribution <- function(rawdata, peaks) {
  ret <- rep(TRUE, nrow(peaks))
  for(i in seq_len(nrow(peaks))) {
    ret[i] <- testNormalDistribution(rawdata, peaks[i,,drop=FALSE])
  }
  
  return(ret)
}
getIntensitiesFromRawdata <- function(rawdata, peak) {
  rt <- rawdata$rt >= peak[,"rtmin"] & rawdata$rt <= peak[,"rtmax"]
  
  rtRange <- c(min(which(rt)), max(which(rt))+1)  
  scanIndices <- 
    rawdata$scanindex[rtRange[1]:min(rtRange[2], length(rawdata$scanindex))]
  #  scanIndices <- scanIndices[!is.na(scanIndices)]
  if(rtRange[2]>length(rawdata$scanindex)) {
    scanIndices <- c(scanIndices, length(rawdata$intensity))
  }
  
  if(length(scanIndices) < 3)
    return(FALSE)  
  
  y <- c()
  for(i in seq_len(length(scanIndices)-1)) {
    scanRange <- c(scanIndices[i]+1, scanIndices[i+1])
    mz <- rawdata$mz[scanRange[1]:scanRange[2]]
    y <- 
      c(y, 
        max(0, (rawdata$intensity[scanRange[1]:scanRange[2]][
          mz >= peak[,"mzmin"] & mz <= peak[,"mzmax"]])
        )
      )
  }
  
  y
}

#' @importFrom stats dnorm cor
testNormalDistribution <- function(rawdata, peak) {
  
  y <- getIntensitiesFromRawdata(rawdata, peak)
  if(length(y) < 3) {
    return(FALSE)
  }
  
  if(max(y)==0) {
    return(FALSE)
  }
  
  normY <- (y-min(y))/(max(y)-min(y))
  
  mean=10; 
  sd=3;
  
  seqModel <- seq(-4,4,length=length(normY))*sd + mean
  yModel <- dnorm(seqModel,mean,sd)
  yModel = yModel* (1/max(yModel))
  correlation <- cor(yModel, normY)
  
  correlation > 0.7
}

#' @importFrom stats cor
testSinusDistribution <- function(rawdata, peak) {
  
  y <- getIntensitiesFromRawdata(rawdata, peak)
  if(length(y) < 3) {
    return(FALSE)
  }
  if(max(y)==0) {
    return(FALSE)
  }
  
  normY <- (y-min(y))/(max(y)-min(y))
  sinCurve <- (sin(seq(-pi/2,pi+1.5,length=length(normY))) + 1) / 2
  correlation <- cor(sinCurve, normY)
  
  correlation > 0.7
  
}
findIsotopes.CAMERA <-  function(mSet, ...) {
  
  # iso_mat <- matrix(0, nrow=0, ncol=2)
  # if(is.null(mSet)) {
  #   return(iso_mat)
  # }
  # 
  # ids <- peaks_IPO(mSet)[,"sample", drop=FALSE]
  # ids <- cbind(1:length(ids), ids)
  # 
  # xsets <- split(mSet, unique(peaks_IPO(mSet)[,"sample"]))
  # samples <- unique(peaks_IPO(mSet)[,"sample"])
  # for(sample in samples) {
  #   an <- xsAnnotate(mSet, sample=sample)
  #   isos <- findIsotopes(an, ...)@isoID[,c("mpeak", "isopeak"), drop=FALSE]
  #   #start_id <- ids[ids[,2]==sample,,drop=FALSE][1,1] - 1
  #   iso_mat <- rbind(iso_mat, matrix(ids[ids[,2]==sample,1][isos], ncol=2))
  # }
  # 
  # iso_mat
}

#' @importFrom stats as.formula lm
createModel <- function(design, params, resp) {
  # add response to the design, which gives the data for the model
  design$resp <- resp
  if(length(params) > 1) {
    # create full second order (SO) model
    # use xi in model, instead of parameter names
    formula <- 
      as.formula(paste("resp ~ SO(", 
                       paste("x", seq_along(params), 
                             sep="", collapse=","),
                       ")", sep=""))
    model <- rsm::rsm(formula, data=design) 
  } else {
    # create full second order model with one parameter
    # here: use parameter name in model
    param_name <- names(params)[1]
    formula <- as.formula(paste("resp ~ ", param_name, " + ", 
                                param_name, " ^ 2", sep="")) 
    model <- lm(formula, data=design) 
    model$coding <- list(x1=as.formula(paste(param_name, "~ x1"))) 
    names(model$coding) <- param_name
    #attr(model, "class") <- c("rsm", "lm")
  }
  return(model)  
}
decode <- function(value, bounds) {
  if(is.na(value))
    value <- 1
  x <- (value+1)/2 # from [-1,1] to [0, 1]
  x <- (x*(max(bounds)-min(bounds))) + min(bounds)
  
  return(x)
}
decodeAll <- function(values, params) {
  
  ret <- rep(0, length(params))
  for(i in seq_along(params))
    ret[i] <- decode(values[i], params[[i]])
  
  names(ret) <- names(params)
  
  return(ret)
}
encode <- function(value, bounds) {
  x <- (value - min(bounds)) / (max(bounds) - min(bounds))
  
  return(x*2-1)
}
attachList <- function(params_1, params_2) {
  params <- params_1
  for(factor in params_2)
    params[[length(params)+1]] <- factor
  
  names(params) <- c(names(params_1), names(params_2))
  return(params)
}
checkParams <- function(params, quantitative_parameters,
                        qualitative_parameters, 
                        unsupported_parameters) { 
  if(length(typeCastParams(params)$to_optimize)==0) {
    stop("No parameters for optimization specified; stopping!")  
  }
  
  for(i in seq_along(params)) {
    param <- params[[i]]
    name <- names(params)[i]
    if(name %in% unsupported_parameters) {
      stop(c("The parameter ", name, " is not supported! 
             Please remove from parameters; stopping!"))
    }
    if(name %in% qualitative_parameters) {
      if(length(param) == 0) {
        stop(c("The parameter ", name, " has no value set! Please specify; stopping!"))
      }
      if(length(param) > 1) {
        stop(c("Optimization of parameter ", name, " not supported! 
               Please specify only one value; stopping!"))
      }
    }
    if(name %in% quantitative_parameters) {
      if(length(param) == 0) {
        stop(c("The parameter ", name, " has no value set!
                   Please specify between one and two; stopping!"))
      } 
      if(length(param) > 2) {
        stop(c("Too many values for parameter ", name, " !
                   Please specify only one or two; stopping!"))
      }
      if(!all(diff(param) > 0)) {
        stop(c("Parameter ", name, " has wrong order!",
                   "Please specify in increasing order; stopping!"))
      }
    }
  }
  missing_params <- 
    which(!(c(quantitative_parameters, qualitative_parameters) %in% 
              names(params)))
  if(length(missing_params > 0)) {
    stop(paste("The parameter(s)", 
               paste(c(quantitative_parameters,
                       qualitative_parameters)[missing_params], 
                     collapse=", "), 
               "is/are missing! Please specify; stopping!"))
  }
  
}

#
##### -----------------==========    Bottom of this function kit   ======----------------######


###########--------------- ========= Function Kit from AutoTuner =========-----------------  
#' @references McLean C (2020). Autotuner: Automated parameter 
#' selection for untargeted metabolomics data processing
#' @references https://github.com/crmclean/Autotuner/


filterPeaksfromNoise <- function(matchedMasses) {
  
  ## Initializing variables for subsetting
  if(length(matchedMasses$values) %% 2 == 0) {
    list_length <- length(matchedMasses$values)/2
  } else {
    list_length <- as.integer(length(matchedMasses$values)/2) + 1
  }
  truePeaks <- vector("list", list_length)
  truePeakIndex <- 1
  lengthCounter <- 1
  lastTrue <- matchedMasses$values[1]
  
  ## subsets things that have a second mass w/in user error
  for(rleIndex in seq_along(matchedMasses$values)) {
    
    start <- lengthCounter
    
    if(lastTrue == TRUE) {
      start <- start + 1
    }
    
    end <- lengthCounter + matchedMasses$lengths[rleIndex]  - 1
    
    if(isTRUE(matchedMasses$values[rleIndex])) {
      end <- end + 1
      lastTrue = TRUE
      truePeaks[[truePeakIndex]] <- start:end
      truePeakIndex <- truePeakIndex + 1
    } else {
      lastTrue = FALSE
      if(!exists("no_match")) {
        no_match <- start:end
      } else {
        no_match <- c(no_match, start:end)
      }
      
    }
    lengthCounter <- lengthCounter + matchedMasses$lengths[rleIndex]
  }
  
  if(is.null(truePeaks[[list_length]])) {
    truePeaks <- truePeaks[seq_len(list_length - 1)]
  }
  
  rm(list_length,truePeakIndex,lastTrue,lengthCounter,start,end,rleIndex)
  return(list(no_match, truePeaks))
}
findTruePeaks <- function(truePeaks, sortedAllEIC) {
  #
  
  # initializing storage ----------------------------------------------------
  ppmData <- list()
  counter <- 1
  
  # Checking if bin elements come from adj scans ----------------------------
  for(i in seq_along(truePeaks)) {
    
    pickedPeak <- truePeaks[[i]]
    
    peakData <- sortedAllEIC[pickedPeak,]
    peakData <- peakData[order(peakData$scan),]
    
    # remove features that are duplicated.  -------------------------------
    if(nrow(peakData) == 1) {
      next()
    }
    
    ## checking to make sure features comes from adjacent scans
    scanDiff <- diff(sort(unique(peakData$scan)))
    
    ## checking that binned peaks:
    # are being picked up within consecutive scans
    peakDists <- (length(unique(scanDiff)) == 1 && unique(scanDiff) == 1)
    if(!peakDists) {
      next()
    }
    
    ## checking to see if any binned masses come from the same scan
    countsInScans <- table(peakData$scan)
    moreInAScan <- any(as.vector(countsInScans) > 1)
    
    if(moreInAScan) {
      
      for(w in seq_len(length(unique(peakData$scan)) - 1)) {
        
        curScan <- unique(peakData$scan)[w]
        nextScan <- unique(peakData$scan)[w+1]
        
        ### add condition here for whenever it is the end of the scan
        
        scanStates <- peakData[peakData$scan == curScan,]
        nextStates <- peakData[peakData$scan == nextScan,]
        curObsRows <- peakData$scan == curScan | peakData$scan ==
          nextScan
        
        peakData <- peakData[!curObsRows,]
        
        ## do this if there are two states in first scan
        
        if(w == 1) {
          
          errorNext <- lapply(scanStates$mz, function(x) {
            obsError <- abs(x - nextStates$mz)/x * 10^6
          })
          
          checkMins <- vapply(X = errorNext, FUN = which.min,
                              FUN.VALUE = numeric(1))
          
          initialState <- which.min(vapply(X = seq_along(errorNext),
                                           FUN = function(x) {
                                             errorNext[[x]][checkMins[x]]
                                           }, FUN.VALUE = numeric(1)))
          
          scanStates <- scanStates[initialState,]
        }
        
        nextStateIndex <- which.max(vapply(X = scanStates$mz, 
                                           FUN = function(x) {
          obsError <- abs(x - nextStates$mz)/x * 10^6
          if(any(obsError == 0)) {
            ## corner case - error is 0 and there are no other
            ## options
            if(length(x) == 1) {
              obsError <- 0.001
            } else {
              obsError[obsError == 0] <-
                min(obsError[obsError != 0])/10
            }
            
          }
          
          intensityProb <- nextStates$intensity/
            sum(nextStates$intensity)
          errorInverse <- 1/obsError
          nextStateProb <- errorInverse/sum(errorInverse) *
            intensityProb
          return(max(nextStateProb/sum(nextStateProb)))
          
        }, FUN.VALUE = numeric(1)))
        
        nextStates <- nextStates[nextStateIndex,]
        
        ## store new states
        bestStates <- rbind(scanStates,nextStates)
        peakData <- rbind(bestStates, peakData)
        peakData <- peakData[order(peakData$scan),]
        
      }
      
      multipleInScan <- TRUE
      
    } else {
      
      multipleInScan <- FALSE
    }
    
    obsPPM <- vapply(X = 2:length(peakData$mz), FUN = function(mz) {
      estimatePPM(peakData$mz[(mz - 1)], peakData$mz[mz])
    }, FUN.VALUE = numeric(1))
    
    # storing output -------------------------------------------------------
    ppmData[[counter]] <- data.frame(meanMZ = mean(peakData$mz),
                                     startScan = min(peakData$scan),
                                     endScan = max(peakData$scan),
                                     scanCount = length(peakData$scan),
                                     Intensity = sum(peakData$intensity),
                                     meanIntensity = mean(
                                       peakData$intensity),
                                     intensityDispersion = sd(
                                       peakData$intensity),
                                     minIntensity = min(peakData$intensity),
                                     meanPPM = paste(signif(obsPPM),
                                                     collapse = ";"),
                                     multipleInScan,
                                     stringsAsFactors = FALSE,
                                     index = i)
    
    counter <- 1 + counter
    
  }
  approvedPeaks <- Reduce(rbind, ppmData)
  return(approvedPeaks)
}
estimatePPM <- function(first, second) {
  abs(first-second)/first * 10 ^ 6
}

#' estimateSNThresh
#' @noRd
#' @importFrom stats var

estimateSNThresh <- function(no_match, sortedAllEIC, approvedPeaks) {
  
  noisePeakTable <- sortedAllEIC[no_match,]
  noise_noise <- noisePeakTable$intensity
  scanCount <- sortedAllEIC$scan
  maxScan <- max(scanCount, na.rm = TRUE)
  minScan <- min(scanCount, na.rm = TRUE)
  scanCount <- scanCount[no_match]
  
  ## generating index for subseting of fixed noise obj
  scanIntervals <- list()
  for(peakID in seq_len(nrow(approvedPeaks))) {
    
    peakStart <- approvedPeaks[peakID,"startScan"]
    peakEnd <- approvedPeaks[peakID,"endScan"]
    peakDist <- peakEnd - peakStart
    lowerBound <- peakStart - peakDist * 2
    
    if(lowerBound < minScan) {
      lowerBound <- minScan
    }
    
    upperBound <- peakEnd + peakDist * 2
    if(upperBound > maxScan) {
      upperBound <- maxScan
    }
    
    scanIntervals[[peakID]] <- which(minScan:maxScan %in% c(lowerBound,
                                                            upperBound))
  }
  
  ## calculating all fixed noise values
  scanRange <- (minScan:maxScan)
  fixedNoiseList <- list()
  counter <- 1
  for(scanID in seq_along(scanRange)) {
    
    peakNoise <- noise_noise[scanCount == scanRange[scanID]]
    
    if(length(peakNoise) == 0) {
      next
    }
    
    fixedNoise <- peakNoise[!(peakNoise %in% boxplot.stats(peakNoise)$out)]
    
    fixedNoiseMean <- mean(x = fixedNoise, na.rm = TRUE)
    fixedNoiseVar <-  stats::var(x = fixedNoise, na.rm = TRUE)
    
    ## 2019-07-08: fixed corner case where only one noise element was
    ## found within the bin
    if(is.na(fixedNoiseVar)) {
      fixedNoiseVar <- 0
    }
    
    N <- length(fixedNoise);
    fixedNoiseList[[counter]] <- data.frame(fixedNoiseMean,fixedNoiseVar,N);
    counter <- 1 + counter;
  }
  fixedNoiseList <- Reduce(rbind, fixedNoiseList);
  
  ## calculating the sd and mean for each group
  noiseIntDb <- list()
  counter <- 1
  for(row in seq_along(scanIntervals)) {
    
    if(row == 1) {
      new <- TRUE;
    } else {
      new <- vapply(X = noiseIntDb, FUN = function(noiseDb) {
        if(!all(scanIntervals[[row]] == as.numeric(noiseDb[,c(1,2)]))) {
          return(TRUE)
        } else {
          return(FALSE)
        }
      }, FUN.VALUE = logical(1))
      new <- all(new)
    }
    
    if(new) {
      ## add check to see if cur row has already been looked at
      curRow <- scanIntervals[[row]]
      curStatDb <- fixedNoiseList[curRow[1]:curRow[2],]
      
      ## added this to check for case when row is missing
      ## if a match to the noise peak was not identified
      if(any(is.na(curStatDb$fixedNoiseMean))) {
        curStatDb <- curStatDb[!is.na(curStatDb$fixedNoiseMean),]
      }
      
      eX2 <- sum((curStatDb$fixedNoiseMean^2 +
                    curStatDb$fixedNoiseVar)*curStatDb$N)
      eX2 <- eX2/sum(curStatDb$N)
      groupMean <- mean(curStatDb$fixedNoiseMean)
      groupVar <- eX2 - groupMean^2
      
      if(groupVar < 0) {
        groupVar <- groupVar * -1
      }
      
      groupSd <- suppressWarnings(sqrt(groupVar))
      noiseIntDb[[counter]] <- data.frame(start = curRow[1],
                                          end = curRow[2], groupMean,
                                          groupSd)
      counter <- counter + 1
    }
  }
  
  noiseIntDb <- Reduce(rbind, noiseIntDb);
  noiseIntDb$key <- apply(noiseIntDb[,c(1,2)], 1, paste, collapse = " ");
  rm(curStatDb, eX2, groupSd, groupVar, groupMean,
     curRow, fixedNoiseList)
  
  SN <- list()
  counter <- 1
  for(peakID in seq_along(scanIntervals)) {
    
    scanInt <- paste(scanIntervals[[peakID]], collapse = " ")
    scanStats <- noiseIntDb[noiseIntDb$key == scanInt,]
    
    if(nrow(scanStats) == 0) {
      next()
    }
    
    ## 2019-06-20 - added here to fix bug if noise calc is wrong
    if(is.nan(scanStats$groupSd) | is.na(scanStats$groupSd)) {
      next()
    }
    
    if(is.nan(scanStats$groupMean)) {
      next()
    }
    
    Peak <- approvedPeaks$Intensity[peakID]
    
    if((Peak - scanStats$groupMean) > 3*scanStats$groupSd) {
      
      ## selects as true peak
      SigNoiseThresh <- (Peak - scanStats$groupMean)/scanStats$groupSd
      SN[[counter]] <- SigNoiseThresh
      counter <- counter + 1
      
    } else {
      
      next()
      
    }
  }
  
  if(!exists("SN")) {
    return(NA)
  }
  
  return(unlist(SN))
}

#' filterPpmError
#' @noRd
#' @importFrom stats density kmeans
#' @importFrom entropy KL.empirical
#' @importFrom cluster clusGap
#' @importFrom grDevices boxplot.stats

filterPpmError <- function(approvedPeaks, useGap, varExpThresh,
                           returnPpmPlots, plotDir, observedPeak,
                           filename) {
  
  ppmObs <- approvedPeaks$meanPPM
  ppmObs <- unlist(lapply(strsplit(split = ";", x = as.character(ppmObs)),
                          function(x) {as.numeric(x)}))
  
  ## Only consider top 1/3 peaks because high quality signals alrady selected
  ppmObs <- sort(ppmObs,decreasing = TRUE)[1:ceiling(length(ppmObs)/3)]
  
  ## 2019-06-19
  ## corner case when all error measurements are identical.
  if(diff(range(ppmObs)) < .Machine$double.eps ^ 0.5) {
    stop(c("All calculated ppm values are identical.",
               "Mass variation of data may be higher than the mass threshold value."
    ))
  }
  
  #message("-------- Number of ppm value across bins: ", length(ppmObs))
  #set.seed(161);
  if(length(ppmObs) > 10000) {
    set.seed(1141);
    ppmObs <- ppmObs[sample(x = seq_along(ppmObs), size = 5000)]
  }
  
  if(length(ppmObs) > 750) {
    
    checkPpm <- length(ppmObs)/2
    subsample <- TRUE
    while(subsample) {
      
      origDist <- stats::density(ppmObs, bw = 1)$y
      newDist1 <-  stats::density(sample(ppmObs, checkPpm), bw = 1)$y
      newDist2 <-  stats::density(sample(ppmObs, checkPpm), bw = 1)$y
      newDist3 <-  stats::density(sample(ppmObs, checkPpm), bw = 1)$y
      newDist4 <-  stats::density(sample(ppmObs, checkPpm), bw = 1)$y
      newDist5 <-  stats::density(sample(ppmObs, checkPpm), bw = 1)$y
      newDist6 <-  stats::density(sample(ppmObs, checkPpm), bw = 1)$y
      newDist7 <-  stats::density(sample(ppmObs, checkPpm), bw = 1)$y
      
      klDistance <- list()
      subSamples <- ls()[grep("newDist",ls())]
      for(j in seq_along(subSamples)) {
        klDistance[[j]] <- suppressWarnings(entropy::KL.empirical(
          origDist,get(subSamples[j])))
      }
      klDistance <- unlist(klDistance)
      
      if(any(klDistance >= 0.5)) {
        subsample <- FALSE
      } else {
        checkPpm <- checkPpm/2
      }
      
    }
    
    ppmObs <- sample(ppmObs, checkPpm)
  }
  
  # ppmEsts <- bplapply(c(1:500),
  #                     FUN = ppmEstCore,
  #                     ppmObs = ppmObs,
  #                     useGap = useGap, BPPARAM = MulticoreParam(4))
  # ppmEsts <- unlist(ppmEsts)
  # # ppmEsts <- vapply(c(1:250), 
  # #                      ppmEstCore, 
  # #                      FUN.VALUE = vector(mode = "double", length = 1),
  # #        ppmObs = ppmObs,
  # #        useGap = useGap)
  # 
  # #save(ppmEsts, file = "ppmEsts.rda")
  # ppmEsts <- sort(ppmEsts)[-c(1, 500)] # remove top extreme values
  # ppmEst <- mean(ppmEsts) + sd(ppmEsts)*2 # increase to cover more 
  
  ppmEsts <- vapply(c(1:50),
                    ppmEstCore,
                    FUN.VALUE = vector(mode = "double", length = 1),
                    ppmObs = ppmObs,
                    useGap = useGap,
                    varExpThresh = varExpThresh)
  ppmEsts <- sort(ppmEsts)[-c(1, 50)] # remove top extreme values
  ppmEst <- mean(ppmEsts) + sd(ppmEsts)*2 # increase to cover more
  return(ppmEst)
}


ppmEstCore <- function(xx, ppmObs,useGap, varExpThresh) {
  
  if(length(ppmObs) < 100) {
    #set.seed(xx)
    kmeansPPM <- kmeans(ppmObs, 1)
  # } else if(useGap) {
  #   set.seed(xx)
  #   gapStat <- cluster::clusGap(x = as.matrix(ppmObs),
  #                               FUNcluster = kmeans,
  #                               K.max = 5,
  #                               B = 7,
  #                               verbose = FALSE)
  #   
  #   gapStat <- gapStat$Tab;
  #   gap <- diff(-gapStat[,3]) > 0;
  #   if(any(gap)) {
  #     clusters <- max(which(gap)) + 1;
  #   } else {
  #     clusters <- 1;
  #   }
  #   set.seed(xx)
  #   kmeansPPM <- kmeans(ppmObs, clusters);
  } else {
    ## estimating clustering based on hard coded 80% Vexp threshold
    clustCount <- 1
    varExp <- 0
    while(varExp < varExpThresh && clustCount < length(ppmObs)/2) {
      #set.seed(xx)
      kmeansPPM <- kmeans(ppmObs, clustCount)
      varExp <- kmeansPPM$betweenss/kmeansPPM$totss
      clustCount <- clustCount + 1
    }
  }
  
  ## cluster which contains smallest ppm values
  clusterSize <-sort(table(kmeansPPM$cluster),decreasing = TRUE)
  maxCluster <- names(clusterSize)[1]
  minCluster <- which(kmeansPPM$cluster == maxCluster)
  rm(clusterSize)
  
  x <- ppmObs
  n <- length(x)
  h <- 1
  ## delete this later
  gauss <- function(x) 1/sqrt(2*pi) * exp(-(x^2)/2)
  gaussDKE <- function(a, x) gauss((x - a)/h)/(n * h)
  
  bumps <- vapply(X = ppmObs[minCluster], FUN = gaussDKE, x = x,
                  FUN.VALUE = numeric(length = length(x)))
  wholeKDE <- vapply(X = ppmObs, FUN = gaussDKE,
                     x,
                     FUN.VALUE = numeric(length =
                                           length(x)))
  
  ## calculating this ahead of time to avoid unnecessary downstream
  ## math
  cKdeMean <- sum(rowSums(bumps))/length(minCluster)
  OutlierScore <- rowSums(wholeKDE)/(cKdeMean)
  
  scoreSub <- which(OutlierScore > 1)
  
  ppmEst <- max(ppmObs[scoreSub])
  maxX <- ppmEst
  ppmEst <- ppmEst + sd(ppmObs[scoreSub])*3
  
  return(ppmEst)
}


#' @importFrom stats smooth.spline
peakwidth_est <- function(peak_vector,
                          time,
                          intensity,
                          start = NULL,
                          end = NULL,
                          old_r2 = NULL) {
  
  
  killSwitch <- FALSE
  
  # check to make sure input values come from vector
  if(!is.numeric(peak_vector)) {
    warning(c("A non numeric vector was given to peakwidth_est().",
                  "This is incorrect. Check the function input."))
  }
  
  # updating data values to put into regression
  
  if(!is.null(start)) { # case where we are in second + iteration of algorithm
    
    end <- end + 1
    
  } else { # case where the algorithm is run for the first time
    
    peak_index <- which(time %in% peak_vector)
    if(length(peak_index) == 0) {
      stop("The peak entered here could not be matched to the chromatography data.")
    }
    
    start <- peak_index[1] - 1
    end <- peak_index[length(peak_index)]
    
  }
  
  # terms for lm
  points <- c(start,start-1,start-2,start-3,end,end+1,end+2,end+3)
  intensityObs <- intensity[points]
  modelIndex <- seq_along(intensityObs)
  
  # correcting na formation within intensity observation vector
  if(any(is.na(intensityObs))) {
    
    naObsIndex <- which(is.na(intensityObs))
    modelIndex <- modelIndex[-naObsIndex]
    intensityObs <- intensityObs[-naObsIndex]
    killSwitch <- TRUE
    
  }
  
  # running smoothing spline on the data
  if(sum(!is.na(peak_vector)) > 1) {
    splineOut <- smooth.spline(modelIndex, intensityObs)
    splineObs <- splineOut$fit$coef
    splineIndex <- seq_along(splineObs)
  } else {
    splineObs <- intensityObs
  }
  splineIndex <- seq_along(splineObs)
  
  # running a linear model on the outcome of the spline
  chrom_table <- data.frame(splineIndex, splineObs)
  model <- lm(formula = splineObs ~ splineIndex, data = chrom_table)
  new_r2 <- summary(model)$r.squared
  
  # used for comparison with previous itterations
  if(is.null(old_r2)) {
    old_r2 <- 0
  }
  
  # recursive case - returns previously calculated model fit if improvement
  if((old_r2 > new_r2 | old_r2 > .9) | killSwitch == TRUE) {
    
    # make sure to return numerical index of fit - not the values being compared
    peak_width <- c(peakStart = start-3, peakEnd = end+3)
    return(peak_width)
    
  } else {
    peakwidth_est(peak_vector, time, intensity, start,
                  end, old_r2 = new_r2)
  }
}


##### -----------------==========    Bottom of this function kit   ======----------------######
xia-lab/OptiLCMS documentation built on March 27, 2024, 11:11 a.m.