R/Feature_alignment_requantification.R

Defines functions Peak_selection_FDR peak_decision requantify_features add_isotope_features align_features convert_rawTIMS mzxml_to_list run_msconvert_raw_mzXML maxDensity

Documented in add_isotope_features align_features convert_rawTIMS maxDensity mzxml_to_list peak_decision Peak_selection_FDR requantify_features run_msconvert_raw_mzXML

#' Function to determine at which value a density maximum is reached
#' @param data Numeric vector
#' @details Uses kernel density estimation function from R-package stats removing missing values
#' @return Numeric indicating at which value a density maximum is reached
#' @export
maxDensity <- function(data)
{
  dens <- stats::density(data,na.rm=T)
  return(dens$x[which(dens$y == max(dens$y))])
}

#' Convert thermo raw files to mzXML with centroided ms1 scans using the ProteoWizard tool msConvert
#' @param path_to_raw Path to folder containing raw files which should be converted
#' @details Requires installation of ProteoWizard (http://proteowizard.sourceforge.net/download.html). Pay attention to installation requirements.
#' @return Resulting mzXML files are stored in a sub-directory within specified raw file folder
#' @export
run_msconvert_raw_mzXML <- function(path_to_raw=NULL)
{
  #suppressWarnings(suppressMessages(library(ff,quietly = T)))
  #suppressWarnings(suppressMessages(library(rChoiceDialogs,quietly = T)))

  if(is.null(path_to_raw))path_to_raw <- rChoiceDialogs::rchoose.dir(caption = "Select folder containing raw files")

  raw_files <- list.files(path_to_raw)
  raw_files <- raw_files[which(grepl("\\.raw",raw_files))]

  print("The following raw files were found in the specified folder: ")
  cat(raw_files, sep = '\n')

  ###check which raw files still have to be converted
  mzXMLs_available <- list.files(base::paste(path_to_raw,"/mzXML",sep=""))

  files_to_be_converted <- raw_files[which(base::gsub("\\.raw","",raw_files) %not in% base::gsub("\\.mzXML","",mzXMLs_available))]

  if(length(files_to_be_converted)>0)
  {
    ###get home directory
    home_folder <- Sys.getenv("HOME")
    home_folder <- base::gsub("Documents","AppData/Local/Apps/",home_folder)

    ###find MSConvert folder
    folders <- list.dirs(path = home_folder, full.names = TRUE, recursive = F)
    folders <- folders[which(grepl("ProteoWizard",folders))]
    folders <- folders[length(folders)]

    if(file.exists(base::paste(folders,"\\msconvert.exe",sep="")))
    {
      path_to_msconvert <- base::paste(folders,"\\msconvert.exe",sep="")
    }else
    {
      print("Could not find msconvert.exe. Can be usually found in C:/Users/user_name/AppData/Local/Apps/ProteoWizard Version")
      pb <- tcltk::tkProgressBar("Warning!",min = 0,max = 3,initial = 0,label = "Could not find msConvert.exe.",width = 500)

      counter <- 1
      label <- c("On Windows typically located in C:/Users/user_name/AppData/Local/Apps/ProteoWizard Version",
                 "Please specify location of msConvert.exe")
      while(T)
      {
        Sys.sleep(3)
        tcltk::setTkProgressBar(pb,value = counter,label = label[counter])
        counter <- counter + 1
        if(counter == 4)break
      }
      close(pb)
      path_to_msconvert <- file.choose()
    }

    ###get user folder
    win_user_folder <- path.expand('~')

    ###create temporary folder
    dir.create(base::paste(win_user_folder,"\\temp_msconvert",sep=""),showWarnings = F)
    dir.create(base::paste(win_user_folder,"\\temp_msconvert\\mzXML",sep=""),showWarnings = F)

    ###create temporary config.txt and files.txt
    temp_path <- base::paste(win_user_folder,"\\temp_msconvert",sep="")
    setwd(temp_path)
    ####config
    fileConn<-file("config.txt")
    writeLines(c("mzXML=true",
                 "64=true",
                 "noindex=false",
                 "zlib=true",
                 "filter=\"peakPicking vendor msLevel=1\"",
                 "filter=\"msLevel 1\""), fileConn)
    close(fileConn)
    ####files
    fileConn<-file("files.txt")
    writeLines(base::paste(path_to_raw,"\\",files_to_be_converted,sep=""), fileConn)
    close(fileConn)

    ###prepare arguments for msconvert
    arg <- base::paste("-f ",temp_path,"\\files.txt",
                 " -o ",temp_path,"\\mzXML",
                 " -c ",temp_path,"\\config.txt",sep="")

    ###run msconvert
    system2(path_to_msconvert, args = arg)

    ###move mzXMLs to original raw folder
    from <- temp_path
    to   <- path_to_raw
    path1 <- base::paste0(from,"\\mzXML")
    path2 <- base::paste0(to,"\\mzXML")

    dir.create(path2,showWarnings = F)

    for(f in base::gsub("\\.raw",".mzXML",files_to_be_converted))
    {
      ff::file.move(base::paste(path1,"\\",f,sep=""),path2)
    }

    ###remove temp msconvert folder
    r <- file.remove(c("files.txt","config.txt"))
  }

}

#' Prepare mzXML files for IceR workflow
#' @param path_to_mzXML Path to folder containing mzXML files
#' @param n_cores Numbers of CPU cores which should be used to perform conversion
#' @details Converts MS1 spectra in mzXML files into tables containing m/z, RT and intensity information per ion
#' @return Resulting ion tables are stored in a sub-directory (all_ion_lists) of the mzXML folder as .RData files
#' @export
mzxml_to_list <- function(path_to_mzXML,n_cores=2)
{
  #suppressWarnings(suppressMessages(library(doParallel,quietly = T)))
  i <- 0
  convert <- function(mzXMLfile,path_to_mzXML)
  {
    #suppressWarnings(suppressMessages(library(readMzXmlData,quietly = T)))

    data <- base::paste(path_to_mzXML,"/",mzXMLfile,sep="")
    pb <- tcltk::tkProgressBar(title = "Read mzXML",label=base::paste( round(0/1*100, 0),"% done"), min = 0,max = 1, width = 300)
    ms <- readMzXmlData::readMzXmlFile(data)
    close(pb)

    #get total rowcount
    rowcount <- 0
    for(i in 1:length(ms))#
    {
      if(ms[[i]]$metaData$msLevel == 1)
      {
        rowcount <- rowcount + length(ms[[i]]$spectrum$mass)
      }
    }

    ###now extract data
    #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
    dat <- data.table::as.data.table(matrix(ncol=3,nrow=rowcount))
    colnames(dat) <- c("m.z","RT","Intensity")
    sample <- mzXMLfile
    sample <- base::substr(sample,1,regexpr(".mzXML",sample)-1)
    dat$m.z <- as.numeric(dat$m.z)
    dat$RT <- as.numeric(dat$RT)
    dat$Intensity <- as.numeric(dat$Intensity)
    ind <- 1
    max <- length(ms)
    pb <- tcltk::tkProgressBar(title = "Extract data",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)

    for(i in 1:length(ms))
    {
      if(ms[[i]]$metaData$msLevel == 1)
      {
        start <- ind
        stop <- ind + length(ms[[i]]$spectrum$mass) - 1
        data.table::set(x = dat,i = start:stop,j = (1L),value = ms[[i]]$spectrum$mass)
        data.table::set(x = dat,i = start:stop,j = (2L),value = ms[[i]]$metaData$retentionTime/60)
        data.table::set(x = dat,i = start:stop,j = (3L),value = ms[[i]]$spectrum$intensity)
        ind <- ind + length(ms[[i]]$spectrum$mass)
      }
      tcltk::setTkProgressBar(pb, i, label = base::paste( round(i/max*100, 0),"% done (",i,"/",max,")",sep=""))
    }
    close(pb)
    save(dat,file=base::paste(path_to_mzXML,"/all_ion_lists/",sample,"_all_ions.RData",sep=""))
  }

  ##Step - Extract all ions per ms1 spectra

  mzXMLfiles <- list.files(path_to_mzXML)
  mzXMLfiles <- mzXMLfiles[which(grepl(".mzXML",mzXMLfiles))]

  dir.create(base::paste(path_to_mzXML,"/all_ion_lists",sep=""),showWarnings = F)

  setwd(base::paste(path_to_mzXML,"/all_ion_lists",sep=""))

  ###check if all ion list are already available and only generate those which are required
  available_all_ion_lists <- list.files()[which(grepl("\\.RData",list.files()))]

  missing_all_ion_lists <- mzXMLfiles[which(base::gsub("\\.mzXML","",mzXMLfiles) %not in% base::gsub("_all_ions\\.RData","",available_all_ion_lists))]
  if(length(missing_all_ion_lists)>0)
  {
    mzXMLfiles <- missing_all_ion_lists

    cl <- parallel::makeCluster(n_cores)#as.numeric(Sys.getenv('NUMBER_OF_PROCESSORS')))
    doParallel::registerDoParallel(cl)
    res <- foreach::foreach(i=mzXMLfiles) %dopar%
      {
        convert(i,path_to_mzXML)
      }
    parallel::stopCluster(cl)
  }

}

#' Extract MS1-spectra from raw TIMS-ToF Pro data
#' @param path_to_raw Path to folder containing d-files
#' @details Convert MS1 spectra with TIMS information in raw files of Bruker TIMS-ToF Pro Mass Spectrometers into tables containing m/z, RT, inverse ion mobility and intensity information per ion. This process is currently very slow and can take several days even for a small data set. It also requires enough space on the home drive of the PC as well as at least 50 Gb of memory.
#' @return Resulting ion tables are stored in a sub-directory (all_ion_lists) of the raw folder as .RData files
#' @export
convert_rawTIMS <- function(path_to_raw=NULL)
{
  ###extract spectra from raw text
  extract_spectra <- function(path,filename)
  {
    #library(data.table)
    con <- file(base::paste(path,filename,sep=""), "r")
    #read until spectrumList length is given
    while(T)
    {
      line = readLines(con, 1)
      if(grepl("    spectrumList \\(",line))
      {
        break
      }
    }
    #get number of total spectra
    n_spectra <- as.numeric(base::gsub("    spectrumList \\(| spectra):","",line[1]))

    #prepare table summarizing relevant information
    spectra <- base::as.data.frame(matrix(ncol=5,nrow=n_spectra))
    colnames(spectra) <- c("RT","1/KO","mz","int","num_ions")
    spectra$RT <- as.numeric(spectra$RT)
    spectra$`1/KO` <- as.numeric(spectra$`1/KO`)
    spectra$mz <- as.character(spectra$mz)
    spectra$int <- as.character(spectra$int)
    spectra$num_ions <- as.numeric(spectra$num_ions)
    count_spectra <- 0

    #jump to section where individual spectra are starting
    while(T)
    {
      line = readLines(con, 1)
      if(grepl("      spectrum:",line))
      {
        break
      }
    }
    #now read individual spectra
    print(base::paste(base::gsub("\\.txt","",filename),": Prepare TIMS-ToF data",sep=""))
    pb <- utils::txtProgressBar(min = 0,max = n_spectra,style = 3)
    for(i in 1:n_spectra)
    {
      #check if ion data is available
      if(i == 1)
      {
        line = readLines(con, 3)
        ion_count <- as.numeric(base::gsub("        defaultArrayLength: ","",line[3]))
      }else
      {
        line = readLines(con, 4)
        ion_count <- as.numeric(base::gsub("        defaultArrayLength: ","",line[4]))
      }


      if(ion_count > 0)
      {
        #read data for this spectrum
        line = readLines(con, 43)

        #increment number of available spectra with ions
        count_spectra <- count_spectra + 1

        #get RT and 1/K0
        RT <- as.numeric(base::gsub("            cvParam: scan start time, |, second","",line[32]))
        ims <- as.numeric(base::gsub("            cvParam: inverse reduced ion mobility, |, volt-second per square centimeter","",line[33]))

        #get m/z and intensities
        mz <- base::gsub(base::paste("          binary: \\[",ion_count,"\\] ",sep=""),"",line[40])
        int <- base::gsub(base::paste("          binary: \\[",ion_count,"\\] ",sep=""),"",line[43])

        #store data in table
        data.table::set(spectra,as.integer(count_spectra),as.integer(1:5),list(RT,ims,mz,int,ion_count))

      }else
      {
        #jump to the end of this spectrum
        line = readLines(con, 41)
      }
      utils::setTxtProgressBar(pb, i)

    }
    close(pb)

    spectra <- spectra[1:count_spectra,]

    close(con)

    ##prepare final table
    table_store <- base::as.data.frame(matrix(ncol=4,nrow=sum(spectra$num_ions)))
    colnames(table_store) <- c("RT","1/KO","mz","int")
    table_store$RT <- as.numeric(table_store$RT)
    table_store$`1/KO` <- as.numeric(table_store$`1/KO`)
    table_store$mz <- as.numeric(table_store$mz)
    table_store$int <- as.numeric(table_store$int)
    counter <- 1
    print(base::paste(base::gsub("\\.txt","",filename),": Finalize conversion of TIMS-ToF data",sep=""))
    pb <- utils::txtProgressBar(min = 0,max = nrow(spectra),style = 3)
    for(i in 1:nrow(spectra))
    {
      start <- counter
      end <- counter + spectra$num_ions[i] - 1
      mzs <- as.numeric(unlist(strsplit(spectra$mz[i]," ")))
      ints <- as.numeric(unlist(strsplit(spectra$int[i]," ")))
      data.table::set(table_store,as.integer(start:end),as.integer(1:4),list(rep(spectra$RT[i],spectra$num_ions[i]),
                                                                 rep(spectra$`1/KO`[i],spectra$num_ions[i]),
                                                                 mzs,
                                                                 ints))
      counter <- counter + spectra$num_ions[i]

      utils::setTxtProgressBar(pb, i)
    }
    close(pb)
    dir.create(base::paste(path,"all_ion_lists",sep=""),showWarnings = F)
    print(base::paste(base::gsub("\\.txt","",filename),": Store extracted spectra data",sep=""))
    save(table_store,file = base::paste(path,"all_ion_lists/",base::gsub("\\.txt","_all_ions.RData",filename),sep=""))

    rm(spectra,table_store)
    gc()
  }
  '%!in%' <- function(x,y)!('%in%'(x,y))
  '%not in%' <- function(x,y)!('%in%'(x,y))

  #suppressWarnings(suppressMessages(library(ff,quietly = T)))
  #suppressWarnings(suppressMessages(library(rChoiceDialogs,quietly = T)))

  if(is.null(path_to_raw))path_to_raw <- rChoiceDialogs::rchoose.dir(caption = "Select folder containing raw files")

  ###get home directory
  home_folder <- Sys.getenv("HOME")
  home_folder <- base::gsub("Documents","AppData/Local/Apps/",home_folder)

  ###find MSConvert folder
  folders <- list.dirs(path = home_folder, full.names = TRUE, recursive = F)
  folders <- folders[which(grepl("ProteoWizard",folders))]
  folders <- folders[length(folders)]

  raw_files <- list.files(path_to_raw)
  raw_files <- raw_files[which(grepl("\\.d",raw_files))]

  ###check which raw files still have to be converted
  text_available <- list.files(base::paste(path_to_raw,"/all_ion_lists",sep=""))

  files_to_be_converted <- raw_files[which(base::gsub("\\.d","",raw_files) %not in% base::gsub("_all_ions.RData","",text_available))]
  if(length(files_to_be_converted)>0)
  {
    ##find location of msconvert.exe
    if(file.exists(base::paste(folders,"\\msconvert.exe",sep="")))
    {
      path_to_msconvert <- base::paste(folders,"\\msconvert.exe",sep="")
    }else
    {
      print("Could not find msconvert.exe. Can be usually found in C:/Users/user_name/AppData/Local/Apps/ProteoWizard Version")
      pb <- tcltk::tkProgressBar("Warning!",min = 0,max = 3,initial = 0,label = "Could not find msConvert.exe.",width = 500)

      counter <- 1
      label <- c("Typically located in C:/Users/user_name/AppData/Local/Apps/ProteoWizard Version",
                 "Please specify location of msConvert.exe")
      while(T)
      {
        Sys.sleep(3)
        tcltk::setTkProgressBar(pb,value = counter,label = label[counter])
        counter <- counter + 1
        if(counter == 4)break
      }
      close(pb)
      path_to_msconvert <- file.choose()
    }

    ##prepare conversion for each file into raw text
    ###get user folder
    win_user_folder <- path.expand('~')

    ###create temporary folder
    dir.create(base::paste(win_user_folder,"\\temp_msconvert",sep=""),showWarnings = F)
    dir.create(base::paste(win_user_folder,"\\temp_msconvert\\temp",sep=""),showWarnings = F)

    ###create temporary config.txt and files.txt
    temp_path <- base::paste(win_user_folder,"\\temp_msconvert",sep="")
    setwd(temp_path)

    ####config
    fileConn<-file("config.txt")
    writeLines(c("text=true",
                 "64=true",
                 "zlib=true",
                 "filter=\"peakPicking vendor msLevel=1\"",
                 "filter=\"msLevel 1\""), fileConn)
    close(fileConn)

    ###now perform conversion using msconvert and subsequently by R for each file step by step

    for(i in 1:length(files_to_be_converted))
    {
      ####specify files.txt for msconvert containing the path to the current file
      fileConn<-file("files.txt")
      writeLines(base::paste(path_to_raw,"\\",files_to_be_converted[i],sep=""), fileConn)
      close(fileConn)

      ###prepare arguments for msconvert
      arg <- base::paste("-f ",temp_path,"\\files.txt",
                   " -o ",temp_path,"\\temp",
                   " -c ",temp_path,"\\config.txt",sep="")

      ###run msconvert
      if(file.exists(base::paste(temp_path,"\\temp\\",base::gsub("\\.d",".txt",files_to_be_converted[i]),sep="")) == F)#check if file is already converted to raw text
      {#if not use msconvert
        system2(path_to_msconvert, args = arg)
      }else
      {
        print("Converted txt file already available")
      }

      ###when finished, trigger conversion into spectra RData file
      if(file.exists(base::paste(temp_path,"\\temp\\all_ion_lists\\",base::gsub("\\.d","_all_ions.RData",files_to_be_converted[i]),sep="")) == F)#check if file is already converted to raw text
      {#if not use msconvert
        extract_spectra(path = base::paste(temp_path,"\\temp\\",sep=""),filename = base::gsub("\\.d",".txt",files_to_be_converted[i]))
      }else
      {
        print("Spectra were already extracted")
      }

      ###copy final spectra .RData file to destination and remove raw txt file
      from <- temp_path
      to   <- path_to_raw
      path1 <- base::paste0(from,"\\temp\\all_ion_lists")
      path2 <- base::paste0(to,"\\all_ion_lists")

      dir.create(path2,showWarnings = F)

      f <- base::gsub("\\.d","_all_ions.RData",files_to_be_converted[i])
      r <- ff::file.move(base::paste(path1,"\\",f,sep=""),path2)
      #remove raw txt file
      r <- file.remove(base::paste(temp_path,"\\temp\\",base::gsub("\\.d",".txt",files_to_be_converted[i]),sep=""))

    }
  }else
  {
    print("All files are already converted.")
  }
}

#' Perform alignment of pre-determined MS1-features by MaxQuant over proteomics samples either analyzed on an Orbitrap machine (e.g. Q-Exactive or Orbitrap Fusion) or a TIMS-ToF Pro
#' @param path_to_MaxQ_output Path to folder containing MaxQuant outputs (txt folder containing at least allpeptides.txt, evidence.txt, peptides.txt and proteinGroups.txt)
#' @param path_to_output Path to folder where IceR results should be stored
#' @param output_file_names_add IceR result name tag. By default IceR_analysis
#' @param mz_window Numeric value indicating maximal m/z deviation around a center. By default set to NA which indicates that the function determines automatically this parameter based on sd of m/z of identified peptides between samples.
#' @param min_mz_window Numberic value indicating how large the automatically determined m/z-window should be. By default set to 0.001 Da. Only required if m/z alignment window should be automatically determined. If set to NA, no minimal window size will be required.
#' @param RT_window Numeric value indicating maximal RT deviation around a center. By default set to NA which indicates that the function determines automatically this parameter based on sd of RT of identified peptides between samples.
#' @param min_RT_window Numberic value indicating how large the automatically determined RT-window should be. By default set to 1 min. Only required if RT alignment window should be automatically determined. If set to NA, no minimal window size will be required.
#' @param feature_mass_deviation_collapse Numeric value indicating which minimal mass deviation is required to distinguish IceR features. By default set to 0.002 Da. IceR features with overlapping RT-windows and mass differences smaller than the specified value are merged.
#' @param only_unmodified_peptides Boolean value indicating if only unmodified peptide sequences are used for alignment. By default set to F.
#' @param remove_contaminants Boolean value indicating if peptide features labeled as contaminants should be removed. By default set to T.
#' @param sample_list Character vector (raw file names) listing which samples should be aligned. By default all samples occuring in MaxQuant outputs are aligned.
#' @param align_unknown Boolean value indicating if only peptide features or also unsequenced features should be aligend over samples. By default set to F.
#' @param min_num_ions_collapse Numeric value indicating how many unsequenced MaxQuant features have to be at least detected over all samples to result in an IceR feature. Only required if align_unknown is set to T. By default set to 10.
#' @param MassSpec_mode String being either "Orbitrap" or "TIMSToF" specifying by which type of Mass Spectrometer the data was generated. By default it expects Thermo Orbitrap data.
#' @param IM_window Numeric value indicating maximal ion mobility (inverse K0) deviation around a center. By default set to NA which indicates that the function determines automatically this parameter based on sd of ion mobility of identified peptides between samples.
#' @param min_IM_window Numberic value indicating how large the automatically determined ion mobility-window (inverse K0) should be. By default set to 0.002. Only required if ion mobility alignment window should be automatically determined. If set to NA, no minimal window size will be required.
#' @details Performs the first steps of the IceR workflow: 1) Alignment window determination if not specified. 2) Alignment of MaxQuant features into IceR features. 3) Transfer of sequence information between MaxQuant features aligned into IceR features. 4) Extraction, modelling and prediction of RT- and m/z-correction factor per IceR feature and sample.
#' @return Outputs are stored in the sub-directory Temporary_files within specified output folder. MaxQuant allpeptides.txt and evidence.txt are converted to RData files. QC plots of estimated alignment windows as well as of random forest modesl and generalized additive models are stored in a QC_plots.pdf. Relevant QC data is stored in Feature_alignment_QC_data.RData. Aligned IceR features are stored in Features_aligned_merged.txt
#' @export
align_features <- function(path_to_MaxQ_output,path_to_output,align_unknown=F,output_file_names_add="IceR_analysis",mz_window=NA,min_mz_window = 0.001,RT_window=NA,min_RT_window=1,min_num_ions_collapse=10,feature_mass_deviation_collapse=0.002,only_unmodified_peptides=F,sample_list=NA,remove_contaminants=T,MassSpec_mode=c("Orbitrap","TIMSToF"),IM_window=NA,min_IM_window=0.002)
{
  options(warn=-1)

  use_mz_at_max_int_for_correction=F

  #Mass spec Mode can only be Orbitrap or TIMSToF
  MassSpec_mode <- MassSpec_mode[1]

  if(any(is.na(sample_list)))sample_list<-NULL

  if(output_file_names_add != "")output_file_names_add <- base::paste("_",output_file_names_add,sep="")

  setwd(path_to_output)
  dir.create("Temporary_files")

  if(file.exists(base::paste("Temporary_files/Features_aligned_merged",output_file_names_add,".txt",sep="")))
  {
    options(warn=1)
    warning(base::paste("Features_aligned_merged",output_file_names_add,".txt is already available in the Temporary_files folder\n IceR will use this one for subsequent steps. If this is not intended, please remove Features_aligned_merged",output_file_names_add,".txt in Temporary_files folder or the complete folder.",sep=""))
    options(warn=-1)
  }else
  {
    if(file.exists("Temporary_files/allPeptides.RData"))
    {
      load("Temporary_files/allPeptides.RData")
      load("Temporary_files/evidence.RData")

      if(is.null(sample_list))
      {
        sample_list <- sort(unique(allpeptides$Raw.file))
      }else
      {
        #check if all Raw.files are in sample_list and that all samples in sample_list are in Raw.file
        all_samples_in_MaxQ <- sort(unique(allpeptides$Raw.file))

        #Any samples specified for IceR but not available in MaxQ data?
        if(any(sample_list %not in% all_samples_in_MaxQ))
        {
          missing <- which(sample_list %not in% all_samples_in_MaxQ)
          options(warn=1)
          warning(paste0(paste(sample_list[missing],collapse = ",")," is/are specified to be requantified by IceR but missing in MaxQuant results. This/these samples will be skipt by IceR."))
          options(warn=-1)
        }

        #Any samples available in MaxQ but not specified for IceR?
        if(any(all_samples_in_MaxQ %not in% sample_list))
        {
          missing <- which(all_samples_in_MaxQ %not in% sample_list)
          options(warn=1)
          warning(paste0(paste(all_samples_in_MaxQ[missing],collapse = ",")," is/are available in MaxQuant results but not specified to be requantified by IceR. This/these samples will be skipt by IceR."))
          options(warn=-1)
        }
      }

    }else
    {
      setwd(path_to_MaxQ_output)
      options(fftempdir = path_to_MaxQ_output)
      print(paste0(Sys.time()," Read MaxQ results"))
      temp <- utils::read.csv(file = "allPeptides.txt",sep='\t',nrows = 2044,header=T)
      tempclasses = sapply(temp, class)
      if(MassSpec_mode == "Orbitrap")
      {
        tempclasses["Intensity"] = "numeric"
        tempclasses[which(tempclasses == "logical" | tempclasses == "character")] <- "factor"
      }else
      {
        tempclasses["m.z"] = "numeric"
        tempclasses["Mass"] = "numeric"
        tempclasses["Retention.time" ] = "numeric"
        tempclasses["Retention.length"] = "numeric"
        tempclasses["Ion.mobility.index"] = "numeric"
        tempclasses["Ion.mobility.index.length"] = "numeric"
        tempclasses[which(tempclasses == "logical" | tempclasses == "character")] <- "factor"
      }
      allpeptides_save <- ff::read.csv.ffdf(file = "allPeptides.txt",sep='\t',VERBOSE = F,colClasses=tempclasses,next.rows = 100000)##read in data in chunks of 100000 rows

      allpeptides <- base::as.data.frame(allpeptides_save)
      allpeptides <- allpeptides[order(allpeptides$Mass),]

      #allpeptidessave <- allpeptides
      ###free some memory
      rm(allpeptides_save)
      gc()

      if(!is.null(sample_list))
      {
        #check if all Raw.files are in sample_list and that all samples in sample_list are in Raw.file
        all_samples_in_MaxQ <- sort(unique(allpeptides$Raw.file))

        #Any samples specified for IceR but not available in MaxQ data?
        if(any(sample_list %not in% all_samples_in_MaxQ))
        {
          missing <- which(sample_list %not in% all_samples_in_MaxQ)
          options(warn=1)
          warning(paste0(paste(sample_list[missing],collapse = ",")," is/are specified to be requantified by IceR but missing in MaxQuant results. This/these samples will be skipt by IceR."))
          options(warn=-1)
        }

        #Any samples available in MaxQ but not specified for IceR?
        if(any(all_samples_in_MaxQ %not in% sample_list))
        {
          missing <- which(all_samples_in_MaxQ %not in% sample_list)
          options(warn=1)
          warning(paste0(paste(all_samples_in_MaxQ[missing],collapse = ",")," is/are available in MaxQuant results but not specified to be requantified by IceR. This/these samples will be skipt by IceR."))
          options(warn=-1)
        }
      }

      ##unify column names
      #RT
      if(length(which(colnames(allpeptides) == "Retention.Length"))==0) ###possibly upper and lower case problem
      {
        if(length(which(colnames(allpeptides) == "Retention.length"))>0) ###possibly upper and lower case problem
        {
          colnames(allpeptides)[which(colnames(allpeptides) == "Retention.length")] <- "Retention.Length"
        }else
        {
          allpeptides$Retention.Length = 0
        }
      }

      if(MassSpec_mode == "Orbitrap")allpeptides$Retention.Length <- allpeptides$Retention.Length/60 ##in Orbitrap allpeptides.txt the retention length is given in sec instead of min like in e.g. evidence.txt

      #MSMS scan numbers
      if(length(which(colnames(allpeptides) == "MSMS.Scan.Numbers"))==0)
      {
        if(length(which(colnames(allpeptides) == "MS.MS.scan.number"))>0)
        {
          colnames(allpeptides)[which(colnames(allpeptides) == "MS.MS.scan.number")] <- "MSMS.Scan.Numbers"
        }else
        {
          allpeptides$MSMS.Scan.Numbers = 0
        }
      }

      ###Some peptides are not available in allpeptides.txt thus we get this additional information from the evidence.txt file
      evidence <- utils::read.csv(file = "evidence.txt",sep='\t',header=T)
      if(any(colnames(evidence) == "MS.MS.Scan.Number"))colnames(evidence)[which(colnames(evidence) == "MS.MS.Scan.Number")] <- "MSMS.Scan.Numbers"
      if(any(colnames(evidence) == "MS.MS.scan.number"))colnames(evidence)[which(colnames(evidence) == "MS.MS.scan.number")] <- "MSMS.Scan.Numbers"
      if(any(colnames(evidence) == "MSMS.Scan.Numbers"))colnames(evidence)[which(colnames(evidence) == "MSMS.Scan.Numbers")] <- "MSMS.Scan.Numbers"
      if(any(colnames(evidence) == "Retention.length"))colnames(evidence)[which(colnames(evidence) == "Retention.length")] <- "Retention.Length"
      if(any(colnames(evidence) == "Ion.mobility.length"))colnames(evidence)[which(colnames(evidence) == "Ion.mobility.length")] <- "Ion.mobility.index.length"
      if(any(colnames(evidence) == "Number.of.scans"))colnames(evidence)[which(colnames(evidence) == "Number.of.scans")] <- "Number.of.frames"

      print(paste0(Sys.time()," Read MaxQ results finished"))

      if(MassSpec_mode == "Orbitrap")
      {
        add_data <- base::as.data.frame(matrix(ncol=ncol(allpeptides),nrow=nrow(evidence),NA))
        colnames(add_data) <- colnames(allpeptides)
        match_col_names <- match(colnames(allpeptides),colnames(evidence))
        data.table::set(add_data,j=as.integer(which(!is.na(match_col_names))),value = evidence[,match_col_names[which(!is.na(match_col_names))]])

        ####now find rows (peptides + modification + sample) which are not yet present in allpeptides.txt
        temp1 <- base::paste(add_data$Sequence,add_data$Modifications,add_data$Raw.file,sep="_")
        temp2 <- base::paste(allpeptides$Sequence,allpeptides$Modifications,allpeptides$Raw.file,sep="_")
        add_data <- add_data[which(temp1 %not in% temp2),]
        allpeptides <- rbind(allpeptides,add_data)
        rm(temp1,temp2,add_data)
        gc()
      }

      if(MassSpec_mode == "TIMSToF")
      {
        add_data <- base::as.data.frame(matrix(ncol=ncol(allpeptides),nrow=nrow(evidence),NA))
        colnames(add_data) <- colnames(allpeptides)
        match_col_names <- match(colnames(allpeptides),colnames(evidence))
        data.table::set(add_data,j=as.integer(which(!is.na(match_col_names))),value = evidence[,match_col_names[which(!is.na(match_col_names))]])

        ###in TIMS allpeptides.txt column sequence and modification is missing thus take evidence table completeley
        add_data$Sequence <- evidence$Sequence
        add_data$Modifications <- evidence$Modifications
        add_data$Proteins <- evidence$Proteins
        add_data$Score <- evidence$Score

        allpeptides$Sequence <- ""
        allpeptides$Modifications <- ""
        allpeptides$Proteins <- ""
        allpeptides$Score <- NA
        allpeptides$Score <- as.numeric(allpeptides$Score)

        allpeptides <- rbind(allpeptides,add_data)
        rm(add_data)
        gc()

        ###convert Ion.mobility.index into 1/K0
        fit <- stats::lm(evidence$X1.K0~evidence$Ion.mobility.index)
        allpeptides$inverse_K0 <- (allpeptides$Ion.mobility.index*fit$coefficients[2])+fit$coefficients[1]
        allpeptides$inverse_K0_length <- allpeptides$Ion.mobility.index.length/1000

      }

      ###if no specific sample list is defined which should be used, use all raw files
      if(is.null(sample_list))
      {
        sample_list <- sort(unique(allpeptides$Raw.file))
      }

      ###clean allpeptides table to reduce required memory space
      if(MassSpec_mode == "Orbitrap")
      {
        if(any(colnames(allpeptides) == "Resolution"))
        {
          allpeptides <- allpeptides[,c("Raw.file","Charge","m.z","Mass","Uncalibrated.m.z","Resolution","Max.intensity.m.z.0","Retention.time","Retention.Length","MS.MS.IDs","Sequence","Modifications","Proteins","Score","MSMS.Scan.Numbers","Intensity")]
        }else
        {
          allpeptides <- allpeptides[,c("Raw.file","Charge","m.z","Mass","Uncalibrated.m.z","Max.intensity.m.z.0","Retention.time","Retention.Length","MS.MS.IDs","Sequence","Modifications","Proteins","Score","MSMS.Scan.Numbers","Intensity")]
          allpeptides$Resolution <- 0
          print("MS resolution seems to be missing in MaxQ outputs !!!")
        }
      }
      if(MassSpec_mode == "TIMSToF")
      {
        allpeptides <- allpeptides[,c("Raw.file","Charge","m.z","Mass","Retention.time","Retention.Length","Sequence","Modifications","Proteins","Score","MSMS.Scan.Numbers","Intensity","inverse_K0","inverse_K0_length")]
        allpeptides$Resolution <- 0 ###no resolution information available
      }

      if(remove_contaminants == T)exclude <- which(grepl("CON",allpeptides$Proteins) | grepl("REV",allpeptides$Proteins))
      if(remove_contaminants == F)exclude <- which(grepl("REV",allpeptides$Proteins))
      if(length(exclude) > 0)allpeptides <- allpeptides[-exclude,] ###remove potential reverse and contaminant peptides


      ###add calibrated RT to all peptides

      temp_evidence <- evidence[,c("Sequence","Raw.file","Charge","Modifications","Calibrated.retention.time")]
      temp_evidence <- stats::aggregate(temp_evidence$Calibrated.retention.time,list(Sequence=evidence$Sequence,Raw.file=evidence$Raw.file,Charge=evidence$Charge,Modifications=evidence$Modifications),FUN=mean,na.rm=T)
      colnames(temp_evidence)[5] <- "Calibrated.retention.time"
      temp <- dplyr::left_join(allpeptides,temp_evidence,by=c("Sequence"="Sequence","Raw.file"="Raw.file","Charge"="Charge","Modifications"="Modifications"))

      missing_RT_calibration_indices <- which(is.na(temp$Calibrated.retention.time) & temp$Sequence != " " & temp$Sequence != "")
      if(length(missing_RT_calibration_indices)>0)
      {
        for(ind in missing_RT_calibration_indices)
        {
          sub <- temp[which(temp$Sequence == temp$Sequence[ind] & temp$Modifications == temp$Modifications[ind]),]
          temp$Calibrated.retention.time[ind] <- stats::median(sub$Calibrated.retention.time,na.rm=T)
        }
      }

      ###if any peptide feature still doesn?t have a valid calibrated RT --> just use observed RT
      temp$Calibrated.retention.time[which(is.na(temp$Calibrated.retention.time) & temp$Sequence != " " & temp$Sequence != "")] <- temp$Retention.time[which(is.na(temp$Calibrated.retention.time) & temp$Sequence != " " & temp$Sequence != "")]
      allpeptides <- temp

      print(paste0(Sys.time()," Determine deviations of m/z from truth"))

      #determine isotope corrected m/z
      if(MassSpec_mode == "Orbitrap")
      {
        ###calculate deviations between samples
        add <- base::as.data.frame(matrix(nrow=nrow(allpeptides),ncol=3))
        colnames(add) <- c("isotope_at_max_int","isotope_corrected_mz_at_max_int","delta_mz_to_mz_at_max_int")
        add[,1] <- as.numeric(add[,1])
        add[,2] <- as.numeric(add[,2])
        add[,3] <- as.numeric(add[,3])

        isotopes <- matrix(nrow=nrow(allpeptides),ncol=4)
        isotopes[,1] <- allpeptides$m.z
        isotopes[,2] <- ((allpeptides$m.z*allpeptides$Charge)+(1*1.002054))/allpeptides$Charge
        isotopes[,3] <- ((allpeptides$m.z*allpeptides$Charge)+(2*1.002054))/allpeptides$Charge
        isotopes[,4] <- ((allpeptides$m.z*allpeptides$Charge)+(3*1.002054))/allpeptides$Charge

        max <- nrow(allpeptides)
        pb <- tcltk::tkProgressBar(title = "Determine deviations of true m/z from observed m/z",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
        start_time <- Sys.time()
        updatecounter <- 0
        time_require <- 0
        for(i in 1:nrow(allpeptides))
        {
          if(!is.na(allpeptides$Max.intensity.m.z.0[i]))
          {
            mz_max_int <- allpeptides$Max.intensity.m.z.0[i]
            deltas <- abs(isotopes[i,] - mz_max_int)
            closest_iso <- which(deltas == min(deltas,na.rm=T))-1
            if(closest_iso > 0) ###+1 or +2 or +3 isotope shows highest intensity
            { ##correct back to isotope +0 m/z
              iso_mult <- closest_iso*1.002054
              mz_max_int <- ((mz_max_int*allpeptides$Charge[i])-(iso_mult))/allpeptides$Charge[i]
            }
            delta_mz_to_mz_at_max_int <- mz_max_int - allpeptides$m.z[i]
            data.table::set(add,as.integer(i),as.integer(1:3),value=as.list(c(closest_iso,mz_max_int,delta_mz_to_mz_at_max_int)))
          }

          updatecounter <- updatecounter + 1
          if(updatecounter >= 100)
          {
            time_elapsed <- difftime(Sys.time(),start_time,units="secs")
            time_require <- (time_elapsed/(i/max))*(1-(i/max))
            td <- lubridate::seconds_to_period(time_require)
            time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

            updatecounter <- 0
            tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
          }
        }
        close(pb)

        allpeptides <- cbind(allpeptides,add)

        ###evidence table

        add <- base::as.data.frame(matrix(nrow=nrow(evidence),ncol=3))
        colnames(add) <- c("isotope_at_max_int","isotope_corrected_mz_at_max_int","delta_mz_to_mz_at_max_int")
        add[,1] <- as.numeric(add[,1])
        add[,2] <- as.numeric(add[,2])
        add[,3] <- as.numeric(add[,3])

        isotopes <- matrix(nrow=nrow(evidence),ncol=4)
        isotopes[,1] <- evidence$m.z
        isotopes[,2] <- ((evidence$m.z*evidence$Charge)+(1*1.002054))/evidence$Charge
        isotopes[,3] <- ((evidence$m.z*evidence$Charge)+(2*1.002054))/evidence$Charge
        isotopes[,4] <- ((evidence$m.z*evidence$Charge)+(3*1.002054))/evidence$Charge

        max <- nrow(evidence)
        pb <- tcltk::tkProgressBar(title = "Determine deviations of true m/z from observed m/z",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
        start_time <- Sys.time()
        updatecounter <- 0
        time_require <- 0

        for(i in 1:nrow(evidence))
        {
          if(!is.na(evidence$Max.intensity.m.z.0[i]))
          {
            mz_max_int <- evidence$Max.intensity.m.z.0[i]
            deltas <- abs(isotopes[i,] - mz_max_int)
            closest_iso <- which(deltas == min(deltas,na.rm=T))-1
            if(closest_iso > 0) ###+1 or +2 or +3 isotope shows highest intensity
            { ##correct back to isotope +0 m/z
              iso_mult <- closest_iso*1.002054
              mz_max_int <- ((mz_max_int*evidence$Charge[i])-(iso_mult))/evidence$Charge[i]
            }
            delta_mz_to_mz_at_max_int <- mz_max_int - evidence$m.z[i]
            data.table::set(add,as.integer(i),as.integer(1:3),value=as.list(c(closest_iso,mz_max_int,delta_mz_to_mz_at_max_int)))
          }

          updatecounter <- updatecounter + 1
          if(updatecounter >= 100)
          {
            time_elapsed <- difftime(Sys.time(),start_time,units="secs")
            time_require <- (time_elapsed/(i/max))*(1-(i/max))
            td <- lubridate::seconds_to_period(time_require)
            time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

            updatecounter <- 0
            tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
          }
        }
        close(pb)

        evidence <- cbind(evidence,add)
      }

      ###finally save prepared data
      setwd(path_to_output)

      save(allpeptides,file = "Temporary_files/allPeptides.RData")
      save(evidence,file = "Temporary_files/evidence.RData")
    }

    # check up-front that all samples were measured with comparable gradient lengths
    max_RTs <- stats::aggregate(allpeptides$Retention.time,by=list(Raw.file = allpeptides$Raw.file),max,na.rm=T)
    max_RTs_range <- c(round(min(max_RTs$x,na.rm=T),digits = 0),round(max(max_RTs$x,na.rm=T),digits=0))
    if (max_RTs_range[2] - max_RTs_range[1] > 1) {
      stop(paste0("Gradient lengths between samples differ too much!!! Minimal observed gradient length: ",max_RTs_range[1],", Maximal observed gradient length: ",max_RTs_range[2],". IceR can only reliably process data acquired with same gradient lengths!!!"))
    }

    QC_data <- list() ##here relevant qc data is stored and finally saved as RData which can be used for re-generating plots

    grDevices::pdf(base::paste("Temporary_files/QC_plots",output_file_names_add,".pdf",sep=""))

    RT_calibration <- T
    mz_calibration <- T

    if(MassSpec_mode == "TIMSToF")
    {
      IM_calibration <- T
    }else
    {
      IM_calibration <- F
    }

    if(MassSpec_mode == "Orbitrap")QC_data[["MaxQ_calibrations"]] <- evidence[,c("Retention.time.calibration","Uncalibrated...Calibrated.m.z..Da.")]

    allpeptides <- allpeptides[order(allpeptides$m.z),]
    rownames(allpeptides) <- c(1:nrow(allpeptides))

    if(only_unmodified_peptides == T)
    {
      identified_ions <- subset(allpeptides,Sequence != " " & Sequence != "" & Modifications == "Unmodified")
    }else
    {
      identified_ions <- subset(allpeptides,Sequence != " " & Sequence != "")
    }

    identified_ions <- identified_ions[order(identified_ions$Sequence,identified_ions$Intensity,decreasing = T),]

    identified_ions$Sequence <- as.character(identified_ions$Sequence)
    unique_peptides <- unique(identified_ions$Sequence)

    if(MassSpec_mode == "Orbitrap")
    {
      windows <- base::as.data.frame(matrix(ncol=11,nrow=nrow(identified_ions)))
      colnames(windows) <- c("Peptide","Mod","Charge","mean_m.z","sd_m.z","mean_RT","sd_RT","num_ions","mean_RT_calibrated","sd_RT_calibrated","sd_m.z_uncalibrated")

      windows$Peptide <- as.character(windows$Peptide)
      windows$Mod <- as.character(windows$Mod)
      windows$Charge <- as.numeric(windows$Charge)
      windows$mean_m.z <- as.numeric(windows$mean_m.z)
      windows$sd_m.z <- as.numeric(windows$sd_m.z)
      windows$mean_RT <- as.numeric(windows$mean_RT)
      windows$sd_RT <- as.numeric(windows$sd_RT)
      windows$num_ions <- as.numeric(windows$num_ions)
      windows$mean_RT_calibrated <- as.numeric(windows$mean_RT_calibrated)
      windows$sd_RT_calibrated <- as.numeric(windows$sd_RT_calibrated)
      windows$sd_m.z_uncalibrated <- as.numeric(windows$sd_m.z_uncalibrated)
    }

    if(MassSpec_mode == "TIMSToF")
    {
      windows <- base::as.data.frame(matrix(ncol=13,nrow=nrow(identified_ions)))
      colnames(windows) <- c("Peptide","Mod","Charge","mean_m.z","sd_m.z","mean_RT","sd_RT","num_ions","mean_RT_calibrated","sd_RT_calibrated","sd_m.z_uncalibrated","mean_IM","sd_IM")

      windows$Peptide <- as.character(windows$Peptide)
      windows$Mod <- as.character(windows$Mod)
      windows$Charge <- as.numeric(windows$Charge)
      windows$mean_m.z <- as.numeric(windows$mean_m.z)
      windows$sd_m.z <- as.numeric(windows$sd_m.z)
      windows$mean_RT <- as.numeric(windows$mean_RT)
      windows$sd_RT <- as.numeric(windows$sd_RT)
      windows$num_ions <- as.numeric(windows$num_ions)
      windows$mean_RT_calibrated <- as.numeric(windows$mean_RT_calibrated)
      windows$sd_RT_calibrated <- as.numeric(windows$sd_RT_calibrated)
      windows$sd_m.z_uncalibrated <- as.numeric(windows$sd_m.z_uncalibrated)
      windows$mean_IM <- as.numeric(windows$mean_IM)
      windows$sd_IM <- as.numeric(windows$sd_IM)
    }

    ###remove outlier samples where RT of a peptide is very different from all other samples
    outlier_RT_deviation <- (max(allpeptides$Retention.time,na.rm=T)/100)*5 ###deviation should not be larger than 5 % of the total chromatographic retention length

    print(paste0(Sys.time()," Determine matching parameter windows"))

    max <- length(unique_peptides)
    pb <- tcltk::tkProgressBar(title = "Determine matching parameter windows",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
    start_time <- Sys.time()
    updatecounter <- 0
    time_require <- 0
    start <- 1
    max_ind <- nrow(identified_ions)
    count_features <- 0
    for(i in 1:length(unique_peptides))
    {
      ind <- start
      while(T)
      {
        ind = ind + 100
        if(ind < max_ind)
        {
          if(identified_ions[ind,"Sequence"] != unique_peptides[i])
          {
            break
          }
        }else
        {
          ind <- max_ind
          break
        }

      }
      sub <- identified_ions[start:ind,]

      ####reset for next start index
      bins <- (ind-start)/100
      start <- ifelse(bins==1,start,start+((bins-1)*100))

      sub <- sub[which(sub$Sequence == unique_peptides[i]),]
      if(length(which(!is.na(sub$Intensity)))>0)sub<-sub[which(!is.na(sub$Intensity)),]
      sub <- sub[!duplicated(sub$Raw.file),]

      for(c in unique(sub$Charge))
      {
        sub1 <- sub[which(sub$Charge == c),]
        for(m in unique(sub1$Modifications))
        {
          sub1 <- sub[which(sub$Charge == c & sub$Modifications == m),]
          count_features <- count_features + 1

          median_RT <- stats::median(sub1$Retention.time,na.rm=T)
          remove <- which(sub1$Retention.time > median_RT + outlier_RT_deviation)
          if(length(remove)>0)sub1 <- sub1[-remove,]
          ###determine mean m/z at peak maximum and mean RT
          if(MassSpec_mode == "Orbitrap")
          {
            data.table::set(windows,as.integer(count_features),as.integer(c(3:11)),value=as.list(c(c,
                                                                                       mean(sub1$m.z,na.rm=T),
                                                                                       stats::sd(sub1$m.z,na.rm=T),
                                                                                       mean(sub1$Retention.time,na.rm=T),
                                                                                       stats::sd(sub1$Retention.time,na.rm=T),
                                                                                       nrow(sub1),mean(sub1$Calibrated.retention.time,na.rm=T),
                                                                                       stats::sd(sub1$Calibrated.retention.time,na.rm=T),
                                                                                       stats::sd(sub1$isotope_corrected_mz_at_max_int,na.rm=T))))
          }
          if(MassSpec_mode == "TIMSToF")
          {
            data.table::set(windows,as.integer(count_features),as.integer(c(3:13)),value=as.list(c(c,
                                                                                       mean(sub1$m.z,na.rm=T),
                                                                                       stats::sd(sub1$m.z,na.rm=T),
                                                                                       mean(sub1$Retention.time,na.rm=T),
                                                                                       stats::sd(sub1$Retention.time,na.rm=T),
                                                                                       nrow(sub1),mean(sub1$Calibrated.retention.time,na.rm=T),
                                                                                       stats::sd(sub1$Calibrated.retention.time,na.rm=T),
                                                                                       stats::sd(sub1$isotope_corrected_mz_at_max_int,na.rm=T),
                                                                                       mean(sub1$inverse_K0,na.rm=T),
                                                                                       stats::sd(sub1$inverse_K0,na.rm=T))))
          }
          data.table::set(windows,as.integer(count_features),as.integer(1:2),value=as.list(c(unique_peptides[i],m)))
        }

      }

      updatecounter <- updatecounter + 1
      if(updatecounter >= 10)
      {
        time_elapsed <- difftime(Sys.time(),start_time,units="secs")
        time_require <- (time_elapsed/(i/max))*(1-(i/max))
        td <- lubridate::seconds_to_period(time_require)
        time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

        updatecounter <- 0
        tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
      }
    }
    close(pb)

    windows <- windows[1:count_features,]

    if(MassSpec_mode == "Orbitrap")
    {
      #generate some QC plots indicating variation of RT and m/z (and IM) for identified features across samples
      graphics::boxplot(windows$sd_m.z_uncalibrated,main="Standard deviation of peptide features m/z",outline=F,ylab="StDev of m/z [Da]")
      graphics::boxplot(windows$sd_RT,main="Standard deviation of peptide features RT",outline=F,ylab="StDev of RT [min]")

      #return some values to the console
      rt_dev <- median(windows$sd_RT,na.rm=T)
      mz_dev <- median(windows$sd_m.z_uncalibrated,na.rm=T)

      print(paste0("Median mz-deviation for peptides between samples: ",round(mz_dev,digits = 6)," Da"))
      print(paste0("Median RT-deviation for peptides between samples: ",round(rt_dev,digits = 2)," min"))
    } else if (MassSpec_mode == "TIMSToF")
    {
      graphics::boxplot(windows$sd_RT,main="Standard deviation of peptide features RT",outline=F,ylab="StDev of RT [min]")
      graphics::boxplot(windows$sd_IM,main="Standard deviation of peptide features inverse K0",outline=F)
      rt_dev <- median(windows$sd_RT,na.rm=T)
      mz_dev <- 0

      print(paste0("Median RT-deviation for peptides between samples: ",round(rt_dev,digits = 2)," min"))
    }

    options(warn=1)
    if (rt_dev > 1) warning("Median RT-deviation between samples is larger than 1 min which could indicate that chromatography is not stable. IceR might not be able to process the data correctly!!!")
    if (mz_dev > 0.01) warning("Median m/z-deviation between samples is larger than 0.01 Da which could indicate that Mass analyzer is not stable. IceR might not be able to process the data correctly!!!")
    options(warn=-1)

    if(is.na(mz_window))
    {
      ###based on boxplots
      if(!is.na(min_mz_window))
      {
        if(grDevices::boxplot.stats(windows$sd_m.z)$`stats`[5]>min_mz_window)
        {
          borders_m.z <- c(-grDevices::boxplot.stats(windows$sd_m.z)$`stats`[5],grDevices::boxplot.stats(windows$sd_m.z)$`stats`[5]) ###upper whisker
        }else
        {
          borders_m.z <- c(-min_mz_window,min_mz_window)
        }
      }else
      {
        borders_m.z <- c(-grDevices::boxplot.stats(windows$sd_m.z)$`stats`[5],grDevices::boxplot.stats(windows$sd_m.z)$`stats`[5]) ###upper whisker
      }


    }else
    {
      ###used defined parameter
      borders_m.z <- c(-mz_window,mz_window)
    }
    if(is.na(RT_window))
    {
      ###based on boxplots
      if(!is.na(min_RT_window))
      {
        if(grDevices::boxplot.stats(windows$sd_RT)$`stats`[5]>min_RT_window)
        {
          borders_RT <- c(-grDevices::boxplot.stats(windows$sd_RT)$`stats`[5],grDevices::boxplot.stats(windows$sd_RT)$`stats`[5]) ##75% quantil based on boxplots
        }else
        {
          borders_RT <- c(-min_RT_window,min_RT_window)
        }
      }else
      {
        borders_RT <- c(-grDevices::boxplot.stats(windows$sd_RT)$`stats`[5],grDevices::boxplot.stats(windows$sd_RT)$`stats`[5]) ##75% quantil based on boxplots
      }

    }else
    {
      ###use defined parameter
      borders_RT <- c(-RT_window,RT_window)
    }

    if(MassSpec_mode == "TIMSToF")
    {
      if(is.na(IM_window))
      {
        ###based on boxplots
        if(!is.na(min_IM_window))
        {
          if(grDevices::boxplot.stats(windows$sd_IM)$`stats`[5]>min_IM_window)
          {
            borders_IM <- c(-grDevices::boxplot.stats(windows$sd_IM)$`stats`[5],grDevices::boxplot.stats(windows$sd_IM)$`stats`[5]) ##75% quantil based on boxplots
          }else
          {
            borders_IM <- c(-min_IM_window,min_IM_window)
          }
        }else
        {
          borders_IM <- c(-grDevices::boxplot.stats(windows$sd_IM)$`stats`[5],grDevices::boxplot.stats(windows$sd_IM)$`stats`[5]) ##75% quantil based on boxplots
        }

      }else
      {
        ###use defined parameter
        borders_IM <- c(-IM_window,IM_window)
      }
    }

    borders_RT_use_save = borders_RT

    if(MassSpec_mode == "Orbitrap")QC_data[["Feature_alignment_windows"]] <- list(RT_window=borders_RT,mz_window=borders_m.z)
    if(MassSpec_mode == "TIMSToF")QC_data[["Feature_alignment_windows"]] <- list(RT_window=borders_RT,mz_window=borders_m.z,IM_window=borders_IM)

    ###determine for how many windows we expect an overlap of ions for different peptide sequences based on the chosen parameters
    windows <- windows[order(windows$mean_m.z),]
    max_i <- nrow(windows)
    windows$overlap <- 0

    max <- nrow(windows)
    pb <- tcltk::tkProgressBar(title = "Determine overlapping peptide features",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
    start_time <- Sys.time()
    updatecounter <- 0
    time_require <- 0
    if(MassSpec_mode == "Orbitrap")
    {
      for(i in 1:nrow(windows))
      {
        start <- ifelse((i - 5) < 1,1,(i - 5))
        end <- ifelse((i + 5) > max_i,max_i,(i + 5))
        sub <- windows[start:end,]
        sub <- sub[which(sub$mean_m.z >= windows$mean_m.z[i] + borders_m.z[1] & sub$mean_m.z <= windows$mean_m.z[i] + borders_m.z[2] & sub$mean_RT >= windows$mean_RT[i] + borders_RT[1] & sub$mean_RT <= windows$mean_RT[i] + borders_RT[2] & sub$Charge == windows$Charge[i]),]

        data.table::set(windows,as.integer(i),12L,value=(nrow(sub)-1))

        updatecounter <- updatecounter + 1
        if(updatecounter >= 100)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
    }
    if(MassSpec_mode == "TIMSToF")
    {
      for(i in 1:nrow(windows))
      {
        start <- ifelse((i - 5) < 1,1,(i - 5))
        end <- ifelse((i + 5) > max_i,max_i,(i + 5))
        sub <- windows[start:end,]
        sub <- sub[which(sub$mean_m.z >= windows$mean_m.z[i] + borders_m.z[1] & sub$mean_m.z <= windows$mean_m.z[i] + borders_m.z[2] &
                           sub$mean_RT >= windows$mean_RT[i] + borders_RT[1] & sub$mean_RT <= windows$mean_RT[i] + borders_RT[2] &
                           sub$mean_IM >= windows$mean_IM[i] + borders_IM[1] & sub$mean_IM <= windows$mean_IM[i] + borders_IM[2] &
                           sub$Charge == windows$Charge[i]),]

        data.table::set(windows,as.integer(i),12L,value=(nrow(sub)-1))

        updatecounter <- updatecounter + 1
        if(updatecounter >= 100)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
    }
    close(pb)

    p <- Barplots(plyr::count(windows$overlap)[,2],main="Number of expected feature overlaps",ylab="Count",Name = plyr::count(windows$overlap)[,1],xlab = "Overlaps",AvgLine = F)
    QC_data[["Alignment_deviations_overlap"]] <- windows

    #####Perform alignment of peptide features

    allpeptides <- allpeptides[order(allpeptides$m.z),]
    rownames(allpeptides) <- c(1:nrow(allpeptides))
    allpeptides$Sequence <- as.character(allpeptides$Sequence)
    allpeptides$Modifications <- as.character(allpeptides$Modifications)
    allpeptides$Proteins <- base::gsub(";","|",allpeptides$Proteins)
    allpeptides$Raw.file <- as.character(allpeptides$Raw.file)
    ###presubset all ions based on charge, here use only rows of allions which are coming from samples which should be also used
    ####also create a library of ions per charge state per mz_window of 0.5 Da
    print(paste0(Sys.time()," Prepare for peptide feature matching - Indexing all ions"))
    rownames(allpeptides) <- c(1:nrow(allpeptides))
    allpeptides_frag <- list()
    allpeptides_frag_indices_per_mz_window <- list()

    for(i in 1:max(allpeptides$Charge))
    {
      if(length(which(allpeptides$Charge == i))>0)
      {
        allpeptides_frag[[i]] <- allpeptides[which(allpeptides$Charge == i),]
        min_max_mz <- c(floor(min(allpeptides_frag[[i]]$m.z)),ceiling(max(allpeptides_frag[[i]]$m.z)))
        indices <- matrix(ncol=4,nrow=min_max_mz[2]-min_max_mz[1])
        indices[,1] <- min_max_mz[1]:(min_max_mz[2]-1)
        indices[,2] <- (min_max_mz[1]+1):(min_max_mz[2])
        for(j in 1:nrow(indices))
        {
          indx <- which(allpeptides_frag[[i]]$m.z >= indices[j,1] & allpeptides_frag[[i]]$m.z <= indices[j,2])
          if(length(indx) > 0)
          {
            indices[j,3:4] <- c(min(indx),max(indx))
          }else
          {
            indices[j,3:4] <- c(NA,NA)
          }

        }
        allpeptides_frag_indices_per_mz_window[[i]] <- indices
      }else
      {
        allpeptides_frag[[i]] <- NA
        allpeptides_frag_indices_per_mz_window[[i]] <- NA
      }

    }
    rm(indices)
    gc()

    temp_data <- base::as.data.frame(matrix(ncol=1,nrow=nrow(allpeptides))) ###here already used ions will be marked
    temp_data[,1] <- as.numeric(temp_data[,1])

    temp <- subset(allpeptides,Sequence != " " & Sequence != "")

    if(MassSpec_mode == "Orbitrap")
    {
      features <- base::as.data.frame(matrix(ncol=21,nrow=length(unique(base::paste(temp$Sequence,temp$Charge,temp$Modifications)))))
      colnames(features) <- c("Feature_name","m.z","RT","Sequence","Protein","MSMS.Scan.Numbers","m.z_range_min","m.z_range_max","RT_range_min","RT_range_max","ion_indices","count_ion_indices","Mass","Charge","mean_Scores","num_matches","Modifications","RT_length","Observed_RT","Observed_mz","Observed_score")

      features <- data.table::as.data.table(features)
      features$Feature_name <- as.character(features$Feature_name)
      features$Mass <- as.numeric(features$Mass)
      features$RT <- as.numeric(features$RT)
      features$Sequence <- as.character(features$Sequence)
      features$Protein <- as.character(features$Protein)
      features$MSMS.Scan.Numbers <- as.character(features$MSMS.Scan.Numbers)
      features$m.z_range_min <- as.numeric(features$m.z_range_min)
      features$m.z_range_max <- as.numeric(features$m.z_range_max)
      features$RT_range_min <- as.numeric(features$RT_range_min)
      features$RT_range_max <- as.numeric(features$RT_range_max)
      features$ion_indices <- as.character(features$ion_indices)
      features$count_ion_indices <- as.numeric(features$count_ion_indices)
      features$m.z <- as.numeric(features$m.z)
      features$Charge <- as.numeric(features$Charge)
      features$mean_Scores <- as.character(features$mean_Scores)
      features$num_matches <- as.character(features$num_matches)
      features$Modifications <- as.character(features$Modifications)
      features$RT_length <- as.numeric(features$RT_length)
      features$Observed_RT <- as.character(features$Observed_RT)
      features$Observed_mz <- as.character(features$Observed_mz)
      features$Observed_score <- as.character(features$Observed_score)
    }
    if(MassSpec_mode == "TIMSToF")
    {
      features <- base::as.data.frame(matrix(ncol=26,nrow=length(unique(base::paste(temp$Sequence,temp$Charge,temp$Modifications)))))
      colnames(features) <- c("Feature_name","m.z","RT","Sequence","Protein","MSMS.Scan.Numbers","m.z_range_min","m.z_range_max","RT_range_min","RT_range_max","ion_indices","count_ion_indices","Mass","Charge","mean_Scores","num_matches","Modifications","RT_length","Observed_RT","Observed_mz","Observed_score","Inv_K0","Inv_K0_length","Inv_K0_range_min","Inv_K0_range_max","Observed_IM")

      features <- data.table::as.data.table(features)
      features$Feature_name <- as.character(features$Feature_name)
      features$Mass <- as.numeric(features$Mass)
      features$RT <- as.numeric(features$RT)
      features$Sequence <- as.character(features$Sequence)
      features$Protein <- as.character(features$Protein)
      features$MSMS.Scan.Numbers <- as.character(features$MSMS.Scan.Numbers)
      features$m.z_range_min <- as.numeric(features$m.z_range_min)
      features$m.z_range_max <- as.numeric(features$m.z_range_max)
      features$RT_range_min <- as.numeric(features$RT_range_min)
      features$RT_range_max <- as.numeric(features$RT_range_max)
      features$ion_indices <- as.character(features$ion_indices)
      features$count_ion_indices <- as.numeric(features$count_ion_indices)
      features$m.z <- as.numeric(features$m.z)
      features$Charge <- as.numeric(features$Charge)
      features$mean_Scores <- as.character(features$mean_Scores)
      features$num_matches <- as.character(features$num_matches)
      features$Modifications <- as.character(features$Modifications)
      features$RT_length <- as.numeric(features$RT_length)
      features$Observed_RT <- as.character(features$Observed_RT)
      features$Observed_mz <- as.character(features$Observed_mz)
      features$Observed_score <- as.character(features$Observed_score)
      features$Inv_K0 <- as.numeric(features$Inv_K0)
      features$Inv_K0_length <- as.numeric(features$Inv_K0_length)
      features$Inv_K0_range_min <- as.numeric(features$Inv_K0_range_min)
      features$Inv_K0_range_max <- as.numeric(features$Inv_K0_range_max)
      features$Observed_IM <- as.character(features$Observed_IM)
    }

    ###get ordering of allpeptides based on sequence
    order_by_Sequence <- order(allpeptides$Sequence,decreasing = T)
    unique_peptides <- sort(unique_peptides,decreasing = T)
    number_of_rows_per_peptide_sequence <- plyr::count(allpeptides$Sequence[which(allpeptides$Sequence != " " & allpeptides$Sequence != "")])
    number_of_rows_per_peptide_sequence <- number_of_rows_per_peptide_sequence[order(number_of_rows_per_peptide_sequence$x,decreasing = T),]

    max <- length(unique_peptides)
    pb <- tcltk::tkProgressBar(title = "Matching known features",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
    start_time <- Sys.time()
    updatecounter <- 0
    time_require <- 0

    last_index <- 1 ###required for subsetting allpeptides per peptide sequence
    count_features <- 0

    if(MassSpec_mode == "Orbitrap")
    {
      for(i in 1:length(unique_peptides))#
      {
        sub_peptide <- allpeptides[order_by_Sequence[last_index:(last_index-1+number_of_rows_per_peptide_sequence[i,2])],]
        last_index <- last_index + number_of_rows_per_peptide_sequence[i,2]
        sub_peptide <- sub_peptide[which(sub_peptide$Sequence == unique_peptides[i]),]
        tryCatch({
          for(c in unique(sub_peptide$Charge))
          {
            for(m in unique(sub_peptide$Modifications))
            {
              sub <- sub_peptide[which(sub_peptide$Charge == c & sub_peptide$Modifications == m),]
              if(nrow(sub) > 0)
              {
                sub <- sub[order(sub$Intensity,decreasing = T),]

                ###only use features close to median feature over all samples for this peptide
                median_m.z <- stats::median(sub$m.z,na.rm=T)##
                median_RT <- stats::median(sub$Retention.time,na.rm=T)
                selection <- which(abs(sub$Retention.time-median_RT) < borders_RT[2] & abs(sub$m.z-median_m.z) < borders_m.z[2])
                sub <- sub[selection,]

                if(nrow(sub) > 0)
                {
                  ###check if peptide was sequenced several times. If this is the case, use feature closest to median
                  if(any(duplicated(sub$Raw.file)))
                  {
                    sub <- sub[!duplicated(sub$Raw.file),]
                  }

                  median_m.z <- stats::median(sub$m.z,na.rm=T)##
                  median_RT <- stats::median(sub$Retention.time,na.rm=T)

                  RT_max_min <- max(sub$Retention.time,na.rm=T) - min(sub$Retention.time,na.rm=T)
                  too_large_RT_variation <- F

                  ###get subset of allpeptides based on charge and mz window indexing
                  start_indices <- which(allpeptides_frag_indices_per_mz_window[[c]][,1] > median_m.z + borders_m.z[1])[1] - 1
                  end_indices <- which(allpeptides_frag_indices_per_mz_window[[c]][,2] > median_m.z + borders_m.z[2])[1] + 1

                  if(is.na(end_indices) | end_indices > nrow(allpeptides_frag_indices_per_mz_window[[c]]))end_indices <- nrow(allpeptides_frag_indices_per_mz_window[[c]])

                  if(is.na(allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]) & !is.na(allpeptides_frag_indices_per_mz_window[[c]][start_indices+1,3]))
                  {
                    start_indices <- allpeptides_frag_indices_per_mz_window[[c]][start_indices+1,3]

                    end_indices_temp <- allpeptides_frag_indices_per_mz_window[[c]][end_indices,4]
                    if (is.na(end_indices_temp)) {
                      end_indices <- na.omit(allpeptides_frag_indices_per_mz_window[[c]][(end_indices+1):nrow(allpeptides_frag_indices_per_mz_window[[c]]),3])[1]
                    }else {
                      end_indices <- end_indices_temp
                    }
                  }else if(!is.na(allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]) & is.na(allpeptides_frag_indices_per_mz_window[[c]][end_indices,4]))
                  {
                    end_indices <- allpeptides_frag_indices_per_mz_window[[c]][start_indices,4]
                    start_indices <- allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]
                  }else if(is.na(allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]) & !is.na(allpeptides_frag_indices_per_mz_window[[c]][end_indices,3]))
                  {
                    start_indices <- allpeptides_frag_indices_per_mz_window[[c]][end_indices,3]
                    end_indices <- allpeptides_frag_indices_per_mz_window[[c]][end_indices,4]
                  }else
                  {
                    start_indices <- allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]
                    end_indices <- allpeptides_frag_indices_per_mz_window[[c]][end_indices,4]
                  }

                  temp_sub <- allpeptides_frag[[c]][start_indices:end_indices,]
                  sub <- temp_sub[which(temp_sub$m.z >= median_m.z + borders_m.z[1] & temp_sub$m.z <= median_m.z + borders_m.z[2] & temp_sub$Retention.time >= median_RT + borders_RT[1] & temp_sub$Retention.time <= median_RT + borders_RT[2] | temp_sub$Sequence == unique_peptides[i] & temp_sub$Charge == c & temp_sub$Modifications == m),]

                  sub <- sub[order(sub$Intensity,sub$Sequence,decreasing = T),]

                  ###check if selected MaxQ features in expected RT and mz window contains outliers (too high deviation in RT or mz compared to all other features)
                  box_stats_rt <- grDevices::boxplot.stats(sub$Retention.time)$stats
                  box_stats_mz <- grDevices::boxplot.stats(sub$m.z)$stats
                  outlier <- which(sub$Retention.time < box_stats_rt[2] | sub$Retention.time > box_stats_rt[4] | sub$m.z < box_stats_mz[2] | sub$m.z > box_stats_mz[4])
                  #features with sequence identification will be never regarded as outlier
                  if(any(sub$Sequence[outlier] == unique_peptides[i]))
                  {
                    outlier <- outlier[-which(sub$Sequence[outlier] == unique_peptides[i])]
                  }
                  if(length(outlier)>0) ###any outlier detected? exclude them
                  {
                    sub <- sub[-outlier,]
                  }

                  ###check if the fraction of features with not currently searched sequence is > 25 %
                  ###if yes, keep all sequences and indicate this with multiple sequences for this feature otherwise remove other sequences
                  count_sequences <- plyr::count(sub$Sequence[which(sub$Sequence != " " & sub$Sequence != "")])
                  if(count_sequences$freq[which(count_sequences$x == unique_peptides[i])] >= 0.8*sum(count_sequences$freq))
                  {
                    sub <- sub[which(sub$Sequence == unique_peptides[i] | sub$Sequence == " " | sub$Sequence == ""),]
                  }

                  if(any(duplicated(sub$Raw.file)))
                  {
                    sub <- sub[!duplicated(sub$Raw.file),]
                  }

                  if(nrow(sub) == 0) ###variation in RT for same peptide sequence between samples is too high
                  {
                    borders_RT_save <- borders_RT
                    too_large_RT_variation <- T
                    borders_RT <- c(-RT_max_min/2,RT_max_min/2)
                    sub <- temp_sub[which(temp_sub$m.z >= median_m.z + borders_m.z[1] & temp_sub$m.z <= median_m.z + borders_m.z[2] & temp_sub$Retention.time >= median_RT + borders_RT[1] & temp_sub$Retention.time <= median_RT + borders_RT[2]),]
                    if(nrow(sub) == 0)###still no match
                    {
                      too_large_RT_variation <- F
                      borders_RT <- borders_RT_save
                    }
                  }

                  if(nrow(sub) > 0)
                  {
                    count_features <- count_features + 1

                    ###new feature - give a new name
                    name <- base::paste("Feature",count_features,sep="_")

                    sequences_relevant <- which(nchar(as.character(sub$Sequence)) > 1)
                    if(length(sequences_relevant) > 0)
                    {
                      sequence <- base::paste(unique(c(sub_peptide$Sequence[1],sub[sequences_relevant,"Sequence"])),collapse=";")
                    }else
                    {
                      sequence <- sub_peptide$Sequence[1]
                    }

                    proteins_relevant <- which(nchar(as.character(sub$Proteins)) > 1)
                    if(length(proteins_relevant) > 0)
                    {
                      protein <- base::paste(unique(c(sub_peptide$Proteins[1],sub[proteins_relevant,"Proteins"])),collapse=";")
                    }else
                    {
                      protein <- sub_peptide$Proteins[1]
                    }

                    msmsscansrelevant_relevant <- which(nchar(as.character(sub$MSMS.Scan.Numbers)) > 1)
                    if(length(msmsscansrelevant_relevant) > 0)
                    {
                      msmsscan <- base::paste(unique(base::paste(sub[msmsscansrelevant_relevant,"Raw.file"],"_msms_",sub[msmsscansrelevant_relevant,"MSMS.Scan.Numbers"],sep="")),collapse=";")
                    }else
                    {
                      msmsscan <- ""
                    }

                    median_mass <- stats::median(sub$Mass,na.rm=T)
                    Charge <- c

                    scores <- NULL
                    num_matches <- NULL
                    for(s in unique(unlist(stringr::str_split(sequence,";"))))
                    {
                      scores <- append(scores,mean(sub$Score[which(sub$Sequence == s)],na.rm=T))
                      num_matches <- append(num_matches,nrow(sub[which(sub$Sequence == s),]))
                    }

                    Modifications <- ifelse(m != "Unmodified",m,"")

                    RT_length <- max(sub$Retention.Length,na.rm = T)

                    temp_RT <- stats::aggregate(sub$Retention.time,by=list(Raw.file=sub$Raw.file),FUN=mean,na.rm=T)

                    Observed_RT <- base::paste(temp_RT[match(sample_list,temp_RT$Raw.file),2],collapse=";") ###order all observed RT according to sample_list


                    temp_mz <- stats::aggregate(sub$isotope_corrected_mz_at_max_int,by=list(Raw.file=sub$Raw.file),FUN=mean,na.rm=T)

                    Observed_mz <- base::paste(temp_mz[match(sample_list,temp_mz$Raw.file),2],collapse=";") ###order all observed mz according to sample_list


                    temp_score <- stats::aggregate(sub$Score,by=list(Raw.file=sub$Raw.file),FUN=mean,na.rm=T)

                    Observed_score <- base::paste(temp_score[match(sample_list,temp_score$Raw.file),2],collapse=";") ###order all observed scores according to sample_list


                    data.table::set(x = features,i = as.integer(count_features),j = as.integer(c(1,4,5,6,11,15,16,17,19,20,21)),value = as.list(c(name,
                                                                                                                                                  sequence,
                                                                                                                                                  protein,
                                                                                                                                                  msmsscan,
                                                                                                                                                  base::paste(rownames(sub),collapse = ","),
                                                                                                                                                  base::paste(scores,collapse=";"),
                                                                                                                                                  base::paste(num_matches,collapse=";"),
                                                                                                                                                  Modifications,
                                                                                                                                                  Observed_RT,
                                                                                                                                                  Observed_mz,
                                                                                                                                                  Observed_score
                    )))


                    data.table::set(x = features,i = as.integer(count_features),j = as.integer(c(2,3,7,8,9,10,12,13,14,18)),value = as.list(c(
                      as.numeric(median_m.z),
                      as.numeric(median_RT),
                      as.numeric(median_m.z+borders_m.z[1]),
                      as.numeric(median_m.z+borders_m.z[2]),
                      as.numeric(median_RT+borders_RT[1]),
                      as.numeric(median_RT+borders_RT[2]),
                      as.numeric(nrow(sub)),
                      as.numeric(median_mass),
                      as.numeric(Charge),
                      RT_length
                    )))



                    ###now mark matched features
                    data.table::set(temp_data,i = as.integer(rownames(sub)),j=1L,value=1)
                    #temp_data[as.numeric(rownames(sub)),1] <- 1

                    ###if RT_window had to be increased, reset borders_RT
                    if(too_large_RT_variation == T)
                    {
                      too_large_RT_variation <- F
                      borders_RT <- borders_RT_save
                    }
                  }
                }
              }
            }
          }
        },error=function(cond) {})


        updatecounter <- updatecounter + 1
        if(updatecounter >= 50)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
    }
    if(MassSpec_mode == "TIMSToF")
    {
      for(i in 1:length(unique_peptides))
      {
        sub_peptide <- allpeptides[order_by_Sequence[last_index:(last_index-1+number_of_rows_per_peptide_sequence[i,2])],]
        last_index <- last_index + number_of_rows_per_peptide_sequence[i,2]
        sub_peptide <- sub_peptide[which(sub_peptide$Sequence == unique_peptides[i]),]
        tryCatch({
          for(c in unique(sub_peptide$Charge))
          {
            for(m in unique(sub_peptide$Modifications))
            {
              sub <- sub_peptide[which(sub_peptide$Charge == c & sub_peptide$Modifications == m),]
              if(nrow(sub) > 0)
              {
                sub <- sub[order(sub$Intensity,decreasing = T),]

                ###only use features close to median feature over all samples for this peptide
                median_m.z <- stats::median(sub$m.z,na.rm=T)##
                median_RT <- stats::median(sub$Retention.time,na.rm=T)
                median_IM <- stats::median(sub$inverse_K0,na.rm=T)
                selection <- which(abs(sub$inverse_K0-median_IM) < borders_IM[2] & abs(sub$Retention.time-median_RT) < borders_RT[2] & abs(sub$m.z-median_m.z) < borders_m.z[2])
                sub <- sub[selection,]

                if(nrow(sub) > 0)
                {
                  ###check if peptide was sequenced several times. If this is the case, use feature closest to median
                  if(any(duplicated(sub$Raw.file)))
                  {
                    sub <- sub[!duplicated(sub$Raw.file),]
                  }

                  median_m.z <- stats::median(sub$m.z,na.rm=T)##
                  median_RT <- stats::median(sub$Retention.time,na.rm=T)
                  median_IM <- stats::median(sub$inverse_K0,na.rm=T)

                  RT_max_min <- max(sub$Retention.time,na.rm=T) - min(sub$Retention.time,na.rm=T)
                  too_large_RT_variation <- F

                  ###get subset of allpeptides based on charge and mz window indexing
                  start_indices <- which(allpeptides_frag_indices_per_mz_window[[c]][,1] > median_m.z + borders_m.z[1])[1] - 1
                  end_indices <- which(allpeptides_frag_indices_per_mz_window[[c]][,2] > median_m.z + borders_m.z[2])[1] + 1

                  if(is.na(end_indices) | end_indices > nrow(allpeptides_frag_indices_per_mz_window[[c]]))end_indices <- nrow(allpeptides_frag_indices_per_mz_window[[c]])

                  if(is.na(allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]) & !is.na(allpeptides_frag_indices_per_mz_window[[c]][start_indices+1,3]))
                  {
                    start_indices <- allpeptides_frag_indices_per_mz_window[[c]][start_indices+1,3]

                    end_indices_temp <- allpeptides_frag_indices_per_mz_window[[c]][end_indices,4]
                    if (is.na(end_indices_temp)) {
                      end_indices <- na.omit(allpeptides_frag_indices_per_mz_window[[c]][(end_indices+1):nrow(allpeptides_frag_indices_per_mz_window[[c]]),3])[1]
                    }else {
                      end_indices <- end_indices_temp
                    }
                  }else if(!is.na(allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]) & is.na(allpeptides_frag_indices_per_mz_window[[c]][end_indices,4]))
                  {
                    end_indices <- allpeptides_frag_indices_per_mz_window[[c]][start_indices,4]
                    start_indices <- allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]
                  }else if(is.na(allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]) & !is.na(allpeptides_frag_indices_per_mz_window[[c]][end_indices,3]))
                  {
                    start_indices <- allpeptides_frag_indices_per_mz_window[[c]][end_indices,3]
                    end_indices <- allpeptides_frag_indices_per_mz_window[[c]][end_indices,4]
                  }else
                  {
                    start_indices <- allpeptides_frag_indices_per_mz_window[[c]][start_indices,3]
                    end_indices <- allpeptides_frag_indices_per_mz_window[[c]][end_indices,4]
                  }

                  temp_sub <- allpeptides_frag[[c]][start_indices:end_indices,]
                  sub <- temp_sub[which(temp_sub$m.z >= median_m.z + borders_m.z[1] & temp_sub$m.z <= median_m.z + borders_m.z[2] &
                                          temp_sub$Retention.time >= median_RT + borders_RT[1] & temp_sub$Retention.time <= median_RT + borders_RT[2] &
                                          temp_sub$inverse_K0 >= median_IM + borders_IM[1] & temp_sub$inverse_K0 <= median_IM + borders_IM[2] |
                                          temp_sub$Sequence == unique_peptides[i] & temp_sub$Charge == c & temp_sub$Modifications == m),]

                  sub <- sub[order(sub$Intensity,sub$Sequence,decreasing = T),]

                  ###check if selected MaxQ features in expected RT, IM and mz window contains outliers (too high deviation in RT, IM or mz compared to all other features)
                  box_stats_rt <- grDevices::boxplot.stats(sub$Retention.time)$stats
                  box_stats_mz <- grDevices::boxplot.stats(sub$m.z)$stats
                  box_stats_im <- grDevices::boxplot.stats(sub$inverse_K0)$stats
                  outlier <- which(sub$Retention.time < box_stats_rt[2] | sub$Retention.time > box_stats_rt[4] |
                                     sub$m.z < box_stats_mz[2] | sub$m.z > box_stats_mz[4] |
                                     sub$inverse_K0 < box_stats_im[2] | sub$inverse_K0 > box_stats_im[4])
                  #features with sequence identification will be never regarded as outlier
                  if(any(sub$Sequence[outlier] == unique_peptides[i]))
                  {
                    outlier <- outlier[-which(sub$Sequence[outlier] == unique_peptides[i])]
                  }
                  if(length(outlier)>0) ###any outlier detected? exclude them
                  {
                    sub <- sub[-outlier,]
                  }

                  ###check if the fraction of features with not currently searched sequence is > 25 %
                  ###if yes, keep all sequences and indicate this with multiple sequences for this feature otherwise remove other sequences
                  count_sequences <- plyr::count(sub$Sequence[which(sub$Sequence != " " & sub$Sequence != "")])
                  if(count_sequences$freq[which(count_sequences$x == unique_peptides[i])] >= 0.8*sum(count_sequences$freq))
                  {
                    sub <- sub[which(sub$Sequence == unique_peptides[i] | sub$Sequence == " " | sub$Sequence == ""),]
                  }

                  if(any(duplicated(sub$Raw.file)))
                  {
                    sub <- sub[!duplicated(sub$Raw.file),]
                  }

                  if(nrow(sub) == 0) ###variation in RT for same peptide sequence between samples is too high
                  {
                    borders_RT_save <- borders_RT
                    too_large_RT_variation <- T
                    borders_RT <- c(-RT_max_min/2,RT_max_min/2)
                    sub <- temp_sub[which(temp_sub$m.z >= median_m.z + borders_m.z[1] & temp_sub$m.z <= median_m.z + borders_m.z[2] &
                                            temp_sub$Retention.time >= median_RT + borders_RT[1] & temp_sub$Retention.time <= median_RT + borders_RT[2] &
                                            temp_sub$inverse_K0 >= median_IM + borders_IM[1] & temp_sub$inverse_K0 <= median_IM + borders_IM[2]),]
                    if(nrow(sub) == 0)###still no match
                    {
                      too_large_RT_variation <- F
                      borders_RT <- borders_RT_save
                    }
                  }

                  if(nrow(sub) > 0)
                  {
                    count_features <- count_features + 1

                    ###new feature - give a new name
                    name <- base::paste("Feature",count_features,sep="_")

                    sequences_relevant <- which(nchar(as.character(sub$Sequence)) > 1)
                    if(length(sequences_relevant) > 0)
                    {
                      sequence <- base::paste(unique(c(sub_peptide$Sequence[1],sub[sequences_relevant,"Sequence"])),collapse=";")
                    }else
                    {
                      sequence <- sub_peptide$Sequence[1]
                    }

                    proteins_relevant <- which(nchar(as.character(sub$Proteins)) > 1)
                    if(length(proteins_relevant) > 0)
                    {
                      protein <- base::paste(unique(c(sub_peptide$Proteins[1],sub[proteins_relevant,"Proteins"])),collapse=";")
                    }else
                    {
                      protein <- sub_peptide$Proteins[1]
                    }

                    msmsscansrelevant_relevant <- which(nchar(as.character(sub$MSMS.Scan.Numbers)) > 1)
                    if(length(msmsscansrelevant_relevant) > 0)
                    {
                      msmsscan <- base::paste(unique(base::paste(sub[msmsscansrelevant_relevant,"Raw.file"],"_msms_",sub[msmsscansrelevant_relevant,"MSMS.Scan.Numbers"],sep="")),collapse=";")
                    }else
                    {
                      msmsscan <- ""
                    }

                    median_mass <- stats::median(sub$Mass,na.rm=T)
                    Charge <- c

                    scores <- NULL
                    num_matches <- NULL
                    for(s in unique(unlist(stringr::str_split(sequence,";"))))
                    {
                      scores <- append(scores,mean(sub$Score[which(sub$Sequence == s)],na.rm=T))
                      num_matches <- append(num_matches,nrow(sub[which(sub$Sequence == s),]))
                    }

                    Modifications <- ifelse(m != "Unmodified",m,"")

                    RT_length <- max(sub$Retention.Length,na.rm = T)
                    temp_RT <- stats::aggregate(sub$Retention.time,by=list(Raw.file=sub$Raw.file),FUN=mean,na.rm=T)
                    Observed_RT <- base::paste(temp_RT[match(sample_list,temp_RT$Raw.file),2],collapse=";") ###order all observed RT according to sample_list

                    temp_mz <- stats::aggregate(sub$m.z,by=list(Raw.file=sub$Raw.file),FUN=mean,na.rm=T)
                    Observed_mz <- base::paste(temp_mz[match(sample_list,temp_mz$Raw.file),2],collapse=";") ###order all observed mz according to sample_list

                    temp_score <- stats::aggregate(sub$Score,by=list(Raw.file=sub$Raw.file),FUN=mean,na.rm=T)
                    Observed_score <- base::paste(temp_score[match(sample_list,temp_score$Raw.file),2],collapse=";") ###order all observed scores according to sample_list

                    IM_length <- max(sub$inverse_K0_length,na.rm = T)
                    temp_IM <- stats::aggregate(sub$inverse_K0,by=list(Raw.file=sub$Raw.file),FUN=mean,na.rm=T)
                    Observed_IM <- base::paste(temp_IM[match(sample_list,temp_IM$Raw.file),2],collapse=";") ###order all observed IM according to sample_list



                    data.table::set(x = features,i = as.integer(count_features),j = as.integer(c(1,4,5,6,11,15,16,17,19,20,21,26)),value = as.list(c(name,
                                                                                                                                                     sequence,
                                                                                                                                                     protein,
                                                                                                                                                     msmsscan,
                                                                                                                                                     base::paste(rownames(sub),collapse = ","),
                                                                                                                                                     base::paste(scores,collapse=";"),
                                                                                                                                                     base::paste(num_matches,collapse=";"),
                                                                                                                                                     Modifications,
                                                                                                                                                     Observed_RT,
                                                                                                                                                     Observed_mz,
                                                                                                                                                     Observed_score,
                                                                                                                                                     Observed_IM
                    )))
                    data.table::set(x = features,i = as.integer(count_features),j = as.integer(c(2,3,7,8,9,10,12,13,14,18,22,23,24,25)),value = as.list(c(
                      as.numeric(median_m.z),
                      as.numeric(median_RT),
                      as.numeric(median_m.z+borders_m.z[1]),
                      as.numeric(median_m.z+borders_m.z[2]),
                      as.numeric(median_RT+borders_RT[1]),
                      as.numeric(median_RT+borders_RT[2]),
                      as.numeric(nrow(sub)),
                      as.numeric(median_mass),
                      as.numeric(Charge),
                      RT_length,
                      median_IM,
                      IM_length,
                      as.numeric(median_IM+borders_IM[1]),
                      as.numeric(median_IM+borders_IM[2])
                    )))

                    ###now mark matched features
                    data.table::set(temp_data,i = as.integer(rownames(sub)),j=1L,value=1)
                    #temp_data[as.numeric(rownames(sub)),1] <- 1

                    ###if RT_window had to be increased, reset borders_RT
                    if(too_large_RT_variation == T)
                    {
                      too_large_RT_variation <- F
                      borders_RT <- borders_RT_save
                    }
                  }
                }
              }
            }
          }
        },error=function(cond) {})

        updatecounter <- updatecounter + 1
        if(updatecounter >= 50)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
    }

    close(pb)

    features <- features[1:count_features,]
    #write.table(features,base::paste("Temporary_files/Features_aligned_peptides",output_file_names_add,".txt",sep=""),row.names = F)

    borders_RT <- borders_RT_use_save

    if(MassSpec_mode == "Orbitrap")save(temp_data,features,allpeptides,borders_m.z,borders_RT,windows,file=base::paste("Temporary_files/features_aligned_step_1",output_file_names_add,".RData",sep=""))
    if(MassSpec_mode == "TIMSToF")save(temp_data,features,allpeptides,borders_m.z,borders_RT,borders_IM,windows,file=base::paste("Temporary_files/features_aligned_step_1",output_file_names_add,".RData",sep=""))
    #####Perform alignment of undefined features

    # if(align_unknown == T)
    # {
    #   ###2.) step - align remaining features
    #
    #   #load(base::paste("features_aligned_step_1",output_file_names_add,".RData",sep=""))
    #
    #   count_features <- nrow(features)
    #
    #   ###start by trying to find unique features over all samples defined by the pair Mass and RT
    #   border_factor <- 4
    #
    #   ####use this matrix for faster subsetting
    #   temp_data_m <- as.matrix(subset(allpeptides,select=c("m.z","Retention.time","Charge")))
    #   temp_data_m <- cbind(temp_data_m,temp_data)
    #   colnames(temp_data_m)[4] <- "Matched"
    #   temp_data_m[which(is.na(temp_data_m[,4])),4] <- 0
    #   #temp_data_m <- cbind(temp_data_m,as.matrix(subset(allpeptides,select=c("Retention.Length"))))
    #
    #   features_unknown <- base::as.data.frame(matrix(ncol=21,nrow=(nrow(temp_data)/min_num_ions_collapse)))
    #   colnames(features_unknown) <- c("Feature_name","m.z","RT","Sequence","Protein","MSMS.Scan.Numbers","m.z_range_min","m.z_range_max","RT_range_min","RT_range_max","ion_indices","count_ion_indices","Mass","Charge","mean_Scores","num_matches","Modifications","RT_length","Observed_RT","Observed_mz","Observed_score")
    #
    #
    #   features_unknown <- data.table::as.data.table(features_unknown)
    #   features_unknown$Feature_name <- as.character(features_unknown$Feature_name)
    #   features_unknown$Mass <- as.numeric(features_unknown$Mass)
    #   features_unknown$RT <- as.numeric(features_unknown$RT)
    #   features_unknown$Sequence <- as.character(features_unknown$Sequence)
    #   features_unknown$Protein <- as.character(features_unknown$Protein)
    #   features_unknown$MSMS.Scan.Numbers <- as.character(features_unknown$MSMS.Scan.Numbers)
    #   features_unknown$m.z_range_min <- as.numeric(features_unknown$m.z_range_min)
    #   features_unknown$m.z_range_max <- as.numeric(features_unknown$m.z_range_max)
    #   features_unknown$RT_range_min <- as.numeric(features_unknown$RT_range_min)
    #   features_unknown$RT_range_max <- as.numeric(features_unknown$RT_range_max)
    #   features_unknown$ion_indices <- as.character(features_unknown$ion_indices)
    #   features_unknown$count_ion_indices <- as.numeric(features_unknown$count_ion_indices)
    #   features_unknown$m.z <- as.numeric(features_unknown$m.z)
    #   features_unknown$Charge <- as.numeric(features_unknown$Charge)
    #   features_unknown$mean_Scores <- as.character(features_unknown$mean_Scores)
    #   features_unknown$num_matches <- as.character(features_unknown$num_matches)
    #   features_unknown$Modifications <- as.character(features_unknown$Modifications)
    #   features_unknown$RT_length <- as.numeric(features_unknown$RT_length)
    #   features_unknown$Observed_RT <- as.character(features_unknown$Observed_RT)
    #   features_unknown$Observed_mz <- as.character(features_unknown$Observed_mz)
    #   features_unknown$Observed_score <- as.character(features_unknown$Observed_score)
    #
    #   max <- nrow(temp_data_m)
    #   pb <- tcltk::tkProgressBar(title = "Aligning unknown features",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
    #   start_time <- Sys.time()
    #   updatecounter <- 0
    #   time_require <- 0
    #   start <- 1
    #   end <- max-1
    #
    #   count_features_unknown <- 0
    #
    #   for(i in start:end) ###go through all features and try to match with others in the data set
    #   {
    #     if(temp_data_m[i,4] == 0)###check if already matched
    #     {
    #       ind <- i
    #       while(T)
    #       {
    #         ind = ind + 1000
    #         if(ind > nrow(temp_data_m))
    #         {
    #           ind <- nrow(temp_data_m)
    #           break
    #         }
    #         if(temp_data_m[ind,1] > (temp_data_m[i,1]+(border_factor*borders_m.z[2])))
    #         {
    #           break
    #         }
    #       }
    #
    #       sub <- temp_data_m[i:ind,]
    #       sub <- sub[which(sub[,4] == 0),]
    #
    #       ###determine which other features might fit to the current unmatched feature
    #       ###here use a wide mass and RT range to get a first preselection for Mass and RT distribution estimation                                  )
    #
    #       matches_preselection <- which(sub[,1] >= (temp_data_m[i,1]+(border_factor*borders_m.z[1])) & sub[,1] <= (temp_data_m[i,1]+(border_factor*borders_m.z[2])) & sub[,2] >= (temp_data_m[i,2]+(border_factor*borders_RT[1])) & sub[,2] <= (temp_data_m[i,2]+(border_factor*borders_RT[2])) & as.numeric(sub[,3]) == temp_data_m[i,3])
    #
    #       #Sys.time()-start
    #       if(length(matches_preselection) >= min_num_ions_collapse) ###at least n other features
    #       {
    #         ###now get estimate of true Mass and RT by using the median
    #         median_m.z <- stats::median(sub[matches_preselection,1],na.rm=T)
    #         median_RT <- stats::median(sub[matches_preselection,2],na.rm=T)
    #
    #         ###determine which features will be finally matched with the normal m/z and RT range
    #         matches_temp <- which(as.numeric(sub[,1]) >= (median_m.z+borders_m.z[1]) & as.numeric(sub[,1]) <= (median_m.z+borders_m.z[2]) & as.numeric(sub[,2]) >= (median_RT+borders_RT[1]) & as.numeric(sub[,2]) <= (median_RT+borders_RT[2]) & as.numeric(sub[,3]) == temp_data_m[i,3])
    #
    #         matches <- as.numeric(rownames(sub)[matches_temp]) ###convert back to original data
    #
    #         if(length(matches) >= min_num_ions_collapse)
    #         {
    #           count_features <- count_features + 1
    #           count_features_unknown <- count_features_unknown + 1
    #           ###new feature - give a new name
    #           name <- base::paste("Feature",count_features,sep="_")
    #
    #           sequences_relevant <- which(nchar(as.character(allpeptides[matches,"Sequence"])) > 1)
    #           if(length(sequences_relevant) > 0)
    #           {
    #             sequence <- base::paste(unique(allpeptides[matches[sequences_relevant],"Sequence"]),collapse=";")
    #           }else
    #           {
    #             sequence <- ""
    #           }
    #
    #           proteins_relevant <- which(nchar(as.character(allpeptides[matches,"Proteins"])) > 1)
    #           if(length(proteins_relevant) > 0)
    #           {
    #             protein <- base::paste(unique(allpeptides[matches[proteins_relevant],"Proteins"]),collapse=";")
    #           }else
    #           {
    #             protein <- ""
    #           }
    #
    #           msmsscansrelevant_relevant <- which(nchar(as.character(allpeptides[matches,"MSMS.Scan.Numbers"])) > 1)
    #           if(length(msmsscansrelevant_relevant) > 0)
    #           {
    #             msmsscan <- base::paste(unique(base::paste(allpeptides[matches[msmsscansrelevant_relevant],"Raw.file"],"_msms_",allpeptides[matches[msmsscansrelevant_relevant],"MSMS.Scan.Numbers"],sep="")),collapse=";")
    #           }else
    #           {
    #             msmsscan <- ""
    #           }
    #
    #           median_mass <- stats::median(allpeptides$Mass[matches],na.rm=T)
    #           Charge <- temp_data_m[i,3]
    #
    #
    #           if(sequence != "")
    #           {
    #             scores <- NULL
    #             num_matches <- NULL
    #             for(s in unique(unlist(stringr::str_split(sequence,";"))))
    #             {
    #               scores <- append(scores,mean(allpeptides[matches,"Score"][which(allpeptides[matches,"Sequence"] == s)],na.rm=T))
    #               num_matches <- append(num_matches,length(which(allpeptides[matches,"Sequence"] == s)))
    #             }
    #
    #             Modifications_relevant <- which(nchar(as.character(allpeptides[matches,"Modifications"])) > 1)
    #             if(length(Modifications_relevant) > 0)
    #             {
    #               Modifications <- base::paste(unique(allpeptides[matches[Modifications_relevant],"Modifications"]),collapse=";")
    #               Modifications <- base::gsub("Unmodified","",Modifications)
    #
    #             }else
    #             {
    #               Modifications <- ""
    #             }
    #           }else
    #           {
    #             scores <- NA
    #             num_matches <- NA
    #             Modifications <- ""
    #           }
    #
    #           RT_length <- stats::median(allpeptides$Retention.Length[matches],na.rm=T)
    #
    #           temp_RT <- stats::aggregate(allpeptides$Retention.time[matches],by=list(Raw.file=allpeptides$Raw.file[matches]),FUN=mean,na.rm=T)
    #           Observed_RT <- base::paste(temp_RT[match(sample_list,temp_RT$Raw.file),2],collapse=";") ###order all observed RT according to sample_list
    #
    #           temp_mz <- stats::aggregate(allpeptides$isotope_corrected_mz_at_max_int[matches],by=list(Raw.file=allpeptides$Raw.file[matches]),FUN=mean,na.rm=T)
    #           Observed_mz <- base::paste(temp_mz[match(sample_list,temp_mz$Raw.file),2],collapse=";") ###order all observed mz according to sample_list
    #
    #           temp_score <- stats::aggregate(allpeptides$Score[matches],by=list(Raw.file=allpeptides$Raw.file[matches]),FUN=mean,na.rm=T)
    #           Observed_score <- base::paste(temp_score[match(sample_list,temp_score$Raw.file),2],collapse=";") ###order all observed scores according to sample_list
    #
    #
    #           data.table::set(x = features_unknown,i = as.integer(count_features_unknown),j = as.integer(c(1,4,5,6,11,15,16,17,19,20,21)),value = as.list(c(name,
    #                                                                                                                                             sequence,
    #                                                                                                                                             protein,
    #                                                                                                                                             msmsscan,
    #                                                                                                                                             base::paste(matches,collapse = ","),
    #                                                                                                                                             base::paste(scores,collapse=";"),
    #                                                                                                                                             base::paste(num_matches,collapse=";"),
    #                                                                                                                                             Modifications,
    #                                                                                                                                             Observed_RT,
    #                                                                                                                                             Observed_mz,
    #                                                                                                                                             Observed_score
    #           )))
    #           data.table::set(x = features_unknown,i = as.integer(count_features_unknown),j = as.integer(c(2,3,7,8,9,10,12,13,14,18)),value = as.list(c(
    #             as.numeric(median_m.z),
    #             as.numeric(median_RT),
    #             as.numeric(median_m.z+borders_m.z[1]),
    #             as.numeric(median_m.z+borders_m.z[2]),
    #             as.numeric(median_RT+borders_RT[1]),
    #             as.numeric(median_RT+borders_RT[2]),
    #             as.numeric(length(matches)),
    #             as.numeric(median_mass),
    #             as.numeric(Charge),
    #             RT_length
    #           )))
    #
    #
    #           ###now mark matched features
    #           # starttime <- Sys.time()
    #           # temp_data_m[matches,4] <- 1
    #           # print(Sys.time()-starttime)
    #
    #           data.table::set(temp_data_m,i=as.integer(matches),j=4L,value=1)
    #
    #         }
    #
    #
    #       }
    #     }
    #
    #     updatecounter <- updatecounter + 1
    #     if(updatecounter >= 5000)
    #     {
    #       time_elapsed <- difftime(Sys.time(),start_time,units="secs")
    #       time_require <- (time_elapsed/(i/max))*(1-(i/max))
    #       td <- lubridate::seconds_to_period(time_require)
    #       time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))
    #
    #       updatecounter <- 0
    #       tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
    #     }
    #
    #   }
    #   close(pb)
    #
    #   features_unknown <- features_unknown[1:count_features_unknown,]
    #
    #   #write.table(features_unknown,base::paste("Temporary_files/Features_aligned_unknown",output_file_names_add,".txt",sep=""),row.names = F)
    #
    #   ####Combine known and unknown features
    #   features <- rbind(features,features_unknown)
    #   rownames(features) <- c()
    #
    #   ###number of matched to number of unmatched ions
    #   value <- length(which(temp_data_m$Matched == 1))/nrow(temp_data_m) * 100
    #   barplot(value,main="Relative fraction of features which are matched",ylab="Fraction [%]")
    #
    #   QC_data[["Numbers_matched_features"]] <- value
    #
    #   ###Store all results
    #
    #   save(temp_data_m,features,count_features_unknown,file=base::paste("features_aligned_step_2",output_file_names_add,".RData",sep=""))
    # }

    #write.table(features,base::paste("Temporary_files/Features_aligned",output_file_names_add,".txt",sep=""),row.names = F)

    ####Merge features which were currently regarded as separated although same RT and m/z
    #features <- utils::read.table("Features_alligned.txt",header = T)

    #####prepare columns to indicate number of charges and with which other features the respective feature was merged
    features$num_diff_charges <- NA
    features$num_diff_charges <- as.numeric(features$num_diff_charges)
    features$merged_with <- ""
    ncolumns <- ncol(features)

    ###collapse features with same m/z and RT parameters and count number of different charge states for same feature
    borders_mass <- c(-feature_mass_deviation_collapse,feature_mass_deviation_collapse)

    max <- nrow(features)
    pb <- tcltk::tkProgressBar(title = "Merge features",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
    start_time <- Sys.time()
    updatecounter <- 0
    time_require <- 0
    if(MassSpec_mode == "Orbitrap")
    {
      for(i in 1:nrow(features))
      {
        if(is.na(features[i,"num_diff_charges"]))
        {
          selection <- which(features$Mass >= features$Mass[i]+borders_mass[1] & features$Mass <= features$Mass[i]+borders_mass[2] &
                               features$RT >= features$RT[i]+borders_RT[1] & features$RT <= features$RT[i]+borders_RT[2])
          if(length(selection) > 1)
          {
            ###merge same charges
            for(c in unique(features[selection,]$Charge))
            {
              if(length(which(features[selection,]$Charge == c)) > 1) ###more than one feature with same charge -> merge
              {
                m.z_range_min <- min(features[selection[which(features[selection,]$Charge == c)],]$m.z_range_min,na.rm=T)
                m.z_range_max <- max(features[selection[which(features[selection,]$Charge == c)],]$m.z_range_max,na.rm=T)
                RT_range_min <- min(features[selection[which(features[selection,]$Charge == c)],]$RT_range_min,na.rm=T)
                RT_range_max <- max(features[selection[which(features[selection,]$Charge == c)],]$RT_range_max,na.rm=T)
                ion_indices <- base::paste(unlist(features[selection[which(features[selection,]$Charge == c)],]$ion_indices),collapse=",")
                count_ion_indices <- sum(features[selection[which(features[selection,]$Charge == c)],]$count_ion_indices,na.rm = T)
                mass <- mean(unlist(features[selection[which(features[selection,]$Charge == c)],]$Mass),na.rm=T)
                m.z <- mean(c(m.z_range_min,m.z_range_max))
                RT <- mean(c(RT_range_min,RT_range_max))

                Sequence <- unique(unlist(stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Sequence,";")))

                scores <- NULL
                num_matches <- NULL
                Modifications <- NULL
                for(s in unique(Sequence))
                {
                  scores_temp <- as.numeric(unlist(stringr::str_split(features[selection[which(features[selection,"Charge"] == c & grepl(s,features[selection,"Sequence"]))],]$mean_Scores,";")))
                  scores <- append(scores,mean(scores_temp,na.rm=T))

                  num_matches_temp <- as.numeric(unlist(stringr::str_split(features[selection[which(features[selection,"Charge"] == c & grepl(s,features[selection,"Sequence"]))],]$num_matches,";")))
                  num_matches <- append(num_matches,sum(num_matches_temp,na.rm=T))

                  Modifications_temp <- unlist(stringr::str_split(features[selection[which(features[selection,"Charge"] == c & grepl(s,features[selection,"Sequence"]))],]$Modifications,";"))
                  Modifications <- append(Modifications,base::paste(Modifications_temp,collapse=","))
                }
                scores <- base::paste(scores,collapse=",")
                num_matches <- base::paste(num_matches,collapse=",")
                Modifications <- base::paste(Modifications,collapse=",")

                if(any(Sequence == ""))Sequence <- Sequence[-which(Sequence == "")]
                Sequence <- base::paste(Sequence,collapse=";")

                Protein <- unique(unlist(stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Protein,";")))
                if(any(Protein == ""))Protein <- Protein[-which(Protein == "")]
                Protein <- base::paste(Protein,collapse=";")

                MSMS.Scan.Numbers <- unique(unlist(stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$MSMS.Scan.Numbers,";")))
                if(any(MSMS.Scan.Numbers == ""))MSMS.Scan.Numbers <- MSMS.Scan.Numbers[-which(MSMS.Scan.Numbers == "")]
                MSMS.Scan.Numbers <- base::paste(MSMS.Scan.Numbers,collapse=";")

                Observed_RTs <- stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Observed_RT,pattern = ";",simplify = T)
                Observed_RTs <- apply(Observed_RTs, 2, as.numeric)
                Observed_RTs <- colMeans(Observed_RTs,na.rm=T)
                Observed_RTs[is.na(Observed_RTs)] <- NA
                Observed_RTs <- base::paste(Observed_RTs,collapse=";")

                Observed_mz <- stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Observed_mz,pattern = ";",simplify = T)
                Observed_mz <- apply(Observed_mz, 2, as.numeric)
                Observed_mz <- colMeans(Observed_mz,na.rm=T)
                Observed_mz[is.na(Observed_mz)] <- NA
                Observed_mz <- base::paste(Observed_mz,collapse=";")

                Observed_score <- stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Observed_score,pattern = ";",simplify = T)
                Observed_score <- apply(Observed_score, 2, as.numeric)
                Observed_score <- colMeans(Observed_score,na.rm=T)
                Observed_score[is.na(Observed_score)] <- NA
                Observed_score <- base::paste(Observed_score,collapse=";")

                data.table::set(x=features,i=selection[which(features[selection,]$Charge == c)[1]],j=as.integer(c(2,3,7,8,9,10,12,13,14)),value=as.list(c(
                  m.z,
                  RT,
                  m.z_range_min,
                  m.z_range_max,
                  RT_range_min,
                  RT_range_max,
                  count_ion_indices,
                  mass,
                  c)))

                data.table::set(x=features,i=selection[which(features[selection,]$Charge == c)[1]],j=as.integer(c(4,5,6,11,15,16,17,19,20,21)),value=as.list(c(
                  Sequence,
                  Protein,
                  MSMS.Scan.Numbers,
                  ion_indices,
                  scores,
                  num_matches,
                  Modifications,
                  Observed_RTs,
                  Observed_mz,
                  Observed_score)))

                data.table::set(x=features,i=selection[which(features[selection,]$Charge == c)[-1]],j=as.integer(ncolumns),value=features$Feature_name[selection[which(features[selection,]$Charge == c)[1]]])
              }
            }
            ###add number of different charges count
            data.table::set(x=features,i=selection,j=as.integer(ncolumns-1),value=length(unique(features[selection,]$Charge)))
          }
        }
        updatecounter <- updatecounter + 1
        if(updatecounter >= 100)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
    }
    if(MassSpec_mode == "TIMSToF")
    {
      for(i in 1:nrow(features))
      {
        if(is.na(features[i,"num_diff_charges"]))
        {
          selection <- which(features$Mass >= features$Mass[i]+borders_mass[1] & features$Mass <= features$Mass[i]+borders_mass[2] &
                               features$RT >= features$RT[i]+borders_RT[1] & features$RT <= features$RT[i]+borders_RT[2] &
                               features$Inv_K0 >= features$Inv_K0[i]+borders_IM[1] & features$Inv_K0 <= features$Inv_K0[i]+borders_IM[2])
          if(length(selection) > 1)
          {
            ###merge same charges
            for(c in unique(features[selection,]$Charge))
            {
              if(length(which(features[selection,]$Charge == c)) > 1) ###more than one feature with same charge -> merge
              {
                m.z_range_min <- min(features[selection[which(features[selection,]$Charge == c)],]$m.z_range_min,na.rm=T)
                m.z_range_max <- max(features[selection[which(features[selection,]$Charge == c)],]$m.z_range_max,na.rm=T)
                RT_range_min <- min(features[selection[which(features[selection,]$Charge == c)],]$RT_range_min,na.rm=T)
                RT_range_max <- max(features[selection[which(features[selection,]$Charge == c)],]$RT_range_max,na.rm=T)
                IM_range_min <- min(features[selection[which(features[selection,]$Charge == c)],]$Inv_K0_range_min,na.rm=T)
                IM_range_max <- max(features[selection[which(features[selection,]$Charge == c)],]$Inv_K0_range_max,na.rm=T)
                ion_indices <- base::paste(unlist(features[selection[which(features[selection,]$Charge == c)],]$ion_indices),collapse=",")
                count_ion_indices <- sum(features[selection[which(features[selection,]$Charge == c)],]$count_ion_indices,na.rm = T)
                mass <- mean(unlist(features[selection[which(features[selection,]$Charge == c)],]$Mass),na.rm=T)
                m.z <- mean(c(m.z_range_min,m.z_range_max))
                RT <- mean(c(RT_range_min,RT_range_max))
                IM <- mean(c(IM_range_min,IM_range_max))

                Sequence <- unique(unlist(stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Sequence,";")))

                scores <- NULL
                num_matches <- NULL
                Modifications <- NULL
                for(s in unique(Sequence))
                {
                  scores_temp <- as.numeric(unlist(stringr::str_split(features[selection[which(features[selection,"Charge"] == c & grepl(s,features[selection,"Sequence"]))],]$mean_Scores,";")))
                  scores <- append(scores,mean(scores_temp,na.rm=T))

                  num_matches_temp <- as.numeric(unlist(stringr::str_split(features[selection[which(features[selection,"Charge"] == c & grepl(s,features[selection,"Sequence"]))],]$num_matches,";")))
                  num_matches <- append(num_matches,sum(num_matches_temp,na.rm=T))

                  Modifications_temp <- unlist(stringr::str_split(features[selection[which(features[selection,"Charge"] == c & grepl(s,features[selection,"Sequence"]))],]$Modifications,";"))
                  Modifications <- append(Modifications,base::paste(Modifications_temp,collapse=","))
                }
                scores <- base::paste(scores,collapse=",")
                num_matches <- base::paste(num_matches,collapse=",")
                Modifications <- base::paste(Modifications,collapse=",")

                if(any(Sequence == ""))Sequence <- Sequence[-which(Sequence == "")]
                Sequence <- base::paste(Sequence,collapse=";")

                Protein <- unique(unlist(stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Protein,";")))
                if(any(Protein == ""))Protein <- Protein[-which(Protein == "")]
                Protein <- base::paste(Protein,collapse=";")

                MSMS.Scan.Numbers <- unique(unlist(stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$MSMS.Scan.Numbers,";")))
                if(any(MSMS.Scan.Numbers == ""))MSMS.Scan.Numbers <- MSMS.Scan.Numbers[-which(MSMS.Scan.Numbers == "")]
                MSMS.Scan.Numbers <- base::paste(MSMS.Scan.Numbers,collapse=";")

                Observed_RTs <- stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Observed_RT,pattern = ";",simplify = T)
                Observed_RTs <- apply(Observed_RTs, 2, as.numeric)
                Observed_RTs <- colMeans(Observed_RTs,na.rm=T)
                Observed_RTs[is.na(Observed_RTs)] <- NA
                Observed_RTs <- base::paste(Observed_RTs,collapse=";")

                Observed_mz <- stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Observed_mz,pattern = ";",simplify = T)
                Observed_mz <- apply(Observed_mz, 2, as.numeric)
                Observed_mz <- colMeans(Observed_mz,na.rm=T)
                Observed_mz[is.na(Observed_mz)] <- NA
                Observed_mz <- base::paste(Observed_mz,collapse=";")

                Observed_score <- stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Observed_score,pattern = ";",simplify = T)
                Observed_score <- apply(Observed_score, 2, as.numeric)
                Observed_score <- colMeans(Observed_score,na.rm=T)
                Observed_score[is.na(Observed_score)] <- NA
                Observed_score <- base::paste(Observed_score,collapse=";")

                Observed_IMs <- stringr::str_split(features[selection[which(features[selection,]$Charge == c)],]$Observed_IM,pattern = ";",simplify = T)
                Observed_IMs <- apply(Observed_IMs, 2, as.numeric)
                Observed_IMs <- colMeans(Observed_IMs,na.rm=T)
                Observed_IMs[is.na(Observed_IMs)] <- NA
                Observed_IMs <- base::paste(Observed_IMs,collapse=";")

                data.table::set(x=features,i=selection[which(features[selection,]$Charge == c)[1]],j=as.integer(c(2,3,7,8,9,10,12,13,14,22,24,25)),value=as.list(c(
                  m.z,
                  RT,
                  m.z_range_min,
                  m.z_range_max,
                  RT_range_min,
                  RT_range_max,
                  count_ion_indices,
                  mass,
                  c,
                  IM,
                  IM_range_min,
                  IM_range_max)))

                data.table::set(x=features,i=selection[which(features[selection,]$Charge == c)[1]],j=as.integer(c(4,5,6,11,15,16,17,19,20,21,26)),value=as.list(c(
                  Sequence,
                  Protein,
                  MSMS.Scan.Numbers,
                  ion_indices,
                  scores,
                  num_matches,
                  Modifications,
                  Observed_RTs,
                  Observed_mz,
                  Observed_score,
                  Observed_IMs)))

                data.table::set(x=features,i=selection[which(features[selection,]$Charge == c)[-1]],j=as.integer(ncolumns),value=features$Feature_name[selection[which(features[selection,]$Charge == c)[1]]])
              }
            }
            ###add number of different charges count
            data.table::set(x=features,i=selection,j=as.integer(ncolumns-1),value=length(unique(features[selection,]$Charge)))
          }
        }
        updatecounter <- updatecounter + 1
        if(updatecounter >= 100)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
    }

    close(pb)

    features$num_diff_charges[is.na(features$num_diff_charges)] <- 1

    features <- features[which(features$merged_with == ""),]
    features <- base::as.data.frame(features)[,-ncolumns]


    ####now add mz and RT calibration information per feature

    RT_calibration_vals <- base::as.data.frame(matrix(ncol=length(sample_list),nrow=nrow(features),NA))
    colnames(RT_calibration_vals) <- base::paste("RT_calibration",sample_list,sep=" ")
    for(c in 1:ncol(RT_calibration_vals))
    {
      RT_calibration_vals[,c] <- as.numeric(RT_calibration_vals[,c])
    }

    mz_calibration_vals <- base::as.data.frame(matrix(ncol=length(sample_list),nrow=nrow(features),NA))
    colnames(mz_calibration_vals) <- base::paste("mz_calibration",sample_list,sep=" ")
    for(c in 1:ncol(mz_calibration_vals))
    {
      mz_calibration_vals[,c] <- as.numeric(mz_calibration_vals[,c])
    }

    if(MassSpec_mode == "TIMSToF")
    {
      IM_calibration_vals <- base::as.data.frame(matrix(ncol=length(sample_list),nrow=nrow(features),NA))
      colnames(IM_calibration_vals) <- base::paste("IM_calibration",sample_list,sep=" ")
      for(c in 1:ncol(IM_calibration_vals))
      {
        IM_calibration_vals[,c] <- as.numeric(IM_calibration_vals[,c])
      }
    }

    if(any(c(RT_calibration,mz_calibration) == T)) ###any of both calibrations should be done?
    {
      ###first: start by taking already available calibration information

      peptide_features <- which(features$Sequence != "")

      pep_seq <- stringr::str_split(features$Sequence,";",simplify = T)

      evidence <- evidence[which(evidence$Raw.file %in% sample_list),]

      ###evidence_peptides
      evidence_peptides <- unique(evidence$Sequence)
      evidence_peptides_indices <- vector(mode = "list", length = length(evidence_peptides))
      names(evidence_peptides_indices) <- evidence_peptides

      max <- length(evidence_peptides)
      pb <- tcltk::tkProgressBar(title = "Indexing peptides",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
      start_time <- Sys.time()
      updatecounter <- 0
      time_require <- 0
      start <- 1
      for(p in evidence_peptides)
      {
        i <- which(evidence_peptides == p)
        evidence_peptides_indices[[p]] <- which(evidence$Sequence == p)

        updatecounter <- updatecounter + 1
        if(updatecounter >= 100)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
      close(pb)

      ###grap already available information for RT and mz calibration from MaxQ output
      max <- length(peptide_features)
      pb <- tcltk::tkProgressBar(title = "Preparing calibrations per feature",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
      start_time <- Sys.time()
      updatecounter <- 0
      time_require <- 0
      counter <- 0
      for(i in peptide_features)
      {
        cur_mods <- stringr::str_split(features$Modifications[i],";|,",simplify = T)
        cur_mods[cur_mods == ""] <- "Unmodified"
        cur_mods <- unique(cur_mods)

        indices <- NULL
        for(p in pep_seq[i,which(pep_seq[i,] != "")])
        {
          indices <- append(indices,evidence_peptides_indices[[p]])
        }
        evidence_sub <- evidence[indices,]
        indices <- which(evidence_sub$Charge == features$Charge[i] & evidence_sub$Modifications %in% cur_mods)

        if(length(indices) > 0)
        {
          if(RT_calibration == T)
          {
            calc_RT <- as.numeric(stringr::str_split(features$Observed_RT[i],pattern = ";",simplify = T)) - features$RT[i]
            data.table::set(RT_calibration_vals,as.integer(i),as.integer(1:length(sample_list)),value = as.list(c(calc_RT)))
          }
          if(IM_calibration == T)
          {
            calc_IM <- as.numeric(stringr::str_split(features$Observed_IM[i],pattern = ";",simplify = T)) - features$Inv_K0[i]
            data.table::set(IM_calibration_vals,as.integer(i),as.integer(1:length(sample_list)),value = as.list(c(calc_IM)))
          }
          if(mz_calibration == T)
          {
            if(use_mz_at_max_int_for_correction == T) ###determine mz correction based on observed mz peaks
            {
              calc_mz <- as.numeric(stringr::str_split(features$Observed_mz[i],pattern = ";",simplify = T)) - features$m.z[i]
              data.table::set(mz_calibration_vals,as.integer(i),as.integer(1:length(sample_list)),value = as.list(c(calc_mz)))
            }else ###use MaxQ correction
            {
              calc_mz <- stats::aggregate(evidence_sub[indices,"Uncalibrated...Calibrated.m.z..Da."],by=list(Sample=evidence_sub$Raw.file[indices]),FUN="mean",na.rm=T)
              data.table::set(mz_calibration_vals,as.integer(i),as.integer(match(calc_mz$Sample,sample_list)),value = as.list(c(calc_mz$x)))
            }
          }
        }
        counter <- counter + 1
        updatecounter <- updatecounter + 1
        if(updatecounter >= 100)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(counter/max))*(1-(counter/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, counter, label=base::paste( round(counter/max*100, 0)," % done (",counter,"/",max,", Time require: ",time_require,")",sep = ""))
        }

      }
      close(pb)

      RT_calibration_vals_save <- RT_calibration_vals

      if(mz_calibration == T & use_mz_at_max_int_for_correction==F)
      {
        ####train random forest model for predicting mz calibration

        #suppressWarnings(suppressMessages(library(randomForest,quietly = T)))

        ###train a model per sample
        models <- list()
        trainsets <- list()
        evalsets <- list()
        if(any(colnames(evidence) == "Resolution"))
        {
          temp_data <- evidence[which(rowSums(is.na(evidence[,c("Raw.file","Retention.time","m.z","Charge","Resolution","Uncalibrated...Calibrated.m.z..Da.")])) == 0),c("Raw.file","Retention.time","m.z","Charge","Resolution","Uncalibrated...Calibrated.m.z..Da.")]
          temp_data$Resolution <- 0
        }else
        {
          temp_data <- evidence[which(rowSums(is.na(evidence[,c("Raw.file","Retention.time","m.z","Charge","Uncalibrated...Calibrated.m.z..Da.")])) == 0),c("Raw.file","Retention.time","m.z","Charge","Uncalibrated...Calibrated.m.z..Da.")]
          temp_data$Resolution <- 0
        }

        print(paste0(Sys.time()," Train RF models for mz-corrections"))
        for(s in sample_list)
        {
          selec_rows <- which(temp_data$Raw.file == s)
          temp_data2 <- temp_data[selec_rows,]
          random_selection <- sample(1:nrow(temp_data2),size = 0.8*nrow(temp_data2),replace = F)
          trainset <- temp_data2[random_selection,]
          evalset <- temp_data2[-random_selection,]

          if(length(unique(trainset[,"Uncalibrated...Calibrated.m.z..Da."]))>5)
          {
            model <- randomForest::randomForest(trainset[,c("Retention.time","m.z","Charge","Resolution")],trainset[,"Uncalibrated...Calibrated.m.z..Da."] , importance = TRUE,ntree = 500, mtry = 4,do.trace=F,nodesize = 100)
            evalset$predicted <- stats::predict(model, evalset, type = "response")
            trainset$predicted <- stats::predict(model, trainset, type = "response")
            models[[s]] <- model
            trainsets[[s]] <- trainset
            evalsets[[s]] <- evalset

            graphics::plot(trainset$predicted,trainset$Uncalibrated...Calibrated.m.z..Da.,main=base::paste(s,"- Train set (80% of data)"),xlab="Predicted m/z calibration",ylab="MaxQ determined m/z calibration")
            fit <- stats::lm(trainset$Uncalibrated...Calibrated.m.z..Da.~trainset$predicted)
            graphics::abline(fit)
            posx <- graphics::par("usr")[1] + (graphics::par("usr")[2]-graphics::par("usr")[1])*0.8
            posy <- graphics::par("usr")[3] + (graphics::par("usr")[4]-graphics::par("usr")[3])*0.2
            graphics::text(posx,posy,base::paste("R^2:",round(as.numeric(summary(fit)[8]),digits=2)))
            fit_train <- fit

            graphics::plot(evalset$predicted,evalset$Uncalibrated...Calibrated.m.z..Da.,main=base::paste(s,"- Validation set (20% of data)"),xlab="Predicted m/z calibration",ylab="MaxQ determined m/z calibration")
            fit <- stats::lm(evalset$Uncalibrated...Calibrated.m.z..Da.~evalset$predicted)
            graphics::abline(fit)
            posx <- graphics::par("usr")[1] + (graphics::par("usr")[2]-graphics::par("usr")[1])*0.8
            posy <- graphics::par("usr")[3] + (graphics::par("usr")[4]-graphics::par("usr")[3])*0.2
            graphics::text(posx,posy,base::paste("R^2:",round(as.numeric(summary(fit)[8]),digits=2)))
            fit_eval <- fit

            print(base::paste(s,": R^2 train-set=",round(as.numeric(summary(fit_train)[8]),digits=2),", R^2 eval-set=",round(as.numeric(summary(fit_eval)[8]),digits=2),sep=""))
          }
        }

        QC_data[["mz_calibration_models"]] <- models
        QC_data[["mz_calibration_train_sets"]] <- trainsets
        QC_data[["mz_calibration_evaluation_sets"]] <- evalsets

        ####now predict mz for missing mzcalibrations of features

        ####get ion indices per feature
        all_ion_indices <- base::as.data.frame(matrix(ncol=6,nrow=nrow(allpeptides)))
        colnames(all_ion_indices) <- c("Feature","Raw.file","Retention.time","m.z","Charge","Resolution")
        all_ion_indices$Feature <- as.numeric(all_ion_indices$Feature)
        all_ion_indices$Raw.file <- as.character(all_ion_indices$Raw.file)
        all_ion_indices$Retention.time <- as.numeric(all_ion_indices$Retention.time)
        all_ion_indices$m.z <- as.numeric(all_ion_indices$m.z)
        all_ion_indices$Charge <- as.integer(all_ion_indices$Charge)
        all_ion_indices$Resolution <- as.numeric(all_ion_indices$Resolution)
        cur_index = 1

        max <- nrow(features)
        pb <- tcltk::tkProgressBar(title = "Prepare features for m/z-calibration prediction by RF",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
        start_time <- Sys.time()
        updatecounter <- 0
        time_require <- 0
        for(i in 1:nrow(features))
        {
          ###start with extracting ion_indices per feature
          cur_ion_indices <- as.numeric(stringr::str_split(features$ion_indices[i],",",simplify = T))
          data.table::set(all_ion_indices,i = as.integer(cur_index:(cur_index-1+length(cur_ion_indices))),1L,i)
          data.table::set(all_ion_indices,i = as.integer(cur_index:(cur_index-1+length(cur_ion_indices))),2L,allpeptides[cur_ion_indices,"Raw.file"])
          data.table::set(all_ion_indices,i = as.integer(cur_index:(cur_index-1+length(cur_ion_indices))),as.integer(3:6),value = as.list(allpeptides[cur_ion_indices,c("Retention.time","m.z","Charge","Resolution")]))

          cur_index <- cur_index+length(cur_ion_indices)

          updatecounter <- updatecounter + 1
          if(updatecounter >= 100)
          {
            time_elapsed <- difftime(Sys.time(),start_time,units="secs")
            time_require <- (time_elapsed/(i/max))*(1-(i/max))
            td <- lubridate::seconds_to_period(time_require)
            time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

            updatecounter <- 0
            tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
          }
        }
        close(pb)

        all_ion_indices <- all_ion_indices[1:(cur_index-1),]

        all_ion_indices$Resolution <- 0

        raw_files <- sample_list

        ####get mean ion propperties per feature to predict m/z calibration per feature for each sample
        median_feature_properties <- all_ion_indices[,-2]#[which(all_ion_indices$Raw.file == raw_files[c]),-2]

        ###determine mean values per feature
        median_feature_properties <- stats::aggregate(median_feature_properties[,-1],by=list(Feature=median_feature_properties$Feature),FUN="median",na.rm=T)

        QC_data[["mz_calibration_median_feature_properties"]] <- median_feature_properties

        max <- ncol(mz_calibration_vals)
        pb <- tcltk::tkProgressBar(title = "Predict mz calibrations using RandomForest models",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
        start_time <- Sys.time()
        updatecounter <- 0
        time_require <- 0

        for(c in 1:ncol(mz_calibration_vals))
        {
          select_model <- which(names(models) == raw_files[c])

          if(length(select_model)>0)
          {
            features_select <- which(is.na(mz_calibration_vals[,c]))

            temp_data <- median_feature_properties[which(median_feature_properties$Feature %in% features_select),]

            temp_data <- temp_data[which(rowSums(is.na(temp_data[,c("Retention.time","m.z","Charge","Resolution")])) == 0),]

            if(nrow(temp_data)> 0)
            {
              prediction <- stats::predict(models[[select_model]], temp_data[,-1], type = "response")

              data.table::set(mz_calibration_vals,as.integer(temp_data$Feature),as.integer(c),value = prediction)
            }
          }
          updatecounter <- updatecounter + 1
          if(updatecounter >= 1)
          {
            time_elapsed <- difftime(Sys.time(),start_time,units="secs")
            time_require <- (time_elapsed/(c/max))*(1-(c/max))
            td <- lubridate::seconds_to_period(time_require)
            time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

            updatecounter <- 0
            tcltk::setTkProgressBar(pb, c, label=base::paste( round(c/max*100, 0)," % done (",c,"/",max,", Time require: ",time_require,")",sep = ""))
          }
        }
        close(pb)
      }

      # }else if(mz_calibration == T & use_mz_at_max_int_for_correction==T) ###use observed mz at max intensity calibration
      # {
      #   ###get rowMedians of mz_calibrations
      #   feature_specific_correction_factor <- matrixStats::rowMedians(as.matrix(mz_calibration_vals),na.rm=T)
      #   ###examine deviation of expected feature_specific shift from observed shift
      #   expected_from_observed <- mz_calibration_vals/feature_specific_correction_factor
      #   ###sample specific shift
      #   sample_specific_correction_factor <- matrixStats::colMedians(log2(as.matrix(expected_from_observed)),na.rm=T)
      #   ###examine deviation of expected feature_specific shift from observed shift
      #   expected_from_observed <- mz_calibration_vals/((2^sample_specific_correction_factor)*feature_specific_correction_factor)
      #   ###plot per sample how well expected mz correction correlates with observed correction
      #   for(c in 1:length(sample_list))
      #   {
      #     sel <- which(abs(mz_calibration_vals[,c]) < 0.01)
      #     graphics::smoothScatter(mz_calibration_vals[sel,c],(2^sample_specific_correction_factor[c])*feature_specific_correction_factor[sel],xlim=c(-0.005,0.005),ylim=c(-0.005,0.005),main=sample_list[c],xlab="Observed mz - feature mz",ylab="Expected mz - feature mz")
      #     graphics::abline(a=0,b=1)
      #     fit <- stats::lm((2^sample_specific_correction_factor[c])*feature_specific_correction_factor[sel]~mz_calibration_vals[sel,c])
      #     graphics::abline(fit,lty=2)
      #     graphics::text(0,0.004,base::paste("Rsq:",round(as.numeric(summary(fit)[8]),digits=2)))
      #   }
      #   ###store calibration factor
      #   QC_data[["mz_calibration_feature_specific_cor_factor"]] <- feature_specific_correction_factor
      #   QC_data[["mz_calibration_sample_specific_cor_factor"]] <- sample_specific_correction_factor
      #
      #   ###replace missing values with expected correction factors
      #   for(c in 1:ncol(mz_calibration_vals))
      #   {
      #     selection <- which(is.na(mz_calibration_vals[,c]))
      #     mz_calibration_vals[,c][selection] <- ((2^sample_specific_correction_factor[c])*feature_specific_correction_factor[selection])
      #   }
      # }

      #if there are still missing values just fill with 0 or expected SILAC shifts
      if(any(is.na(mz_calibration_vals)))
      {
        mz_calibration_vals[is.na(mz_calibration_vals)] <- 0
      }

      if(RT_calibration == T)
      {
        graphics::plot(1, type="n", axes=F, xlab="", ylab="")
        graphics::text(1,1,"RT GAM fitting - First round of estimation")

        RT_alignment_GAM_models <- list()
        ###fit GAM and predict per sample.
        for(c in 1:ncol(RT_calibration_vals))
        {
          x <- features$RT[which(RT_calibration_vals[,c] != 0)]
          y <- RT_calibration_vals[,c][which(RT_calibration_vals[,c] != 0)]
          if(length(x) > 10) ###require at least 500 data points to perform fitting
          {
            ##try to fit an average generalised additive model to determine a RT dependent calibration curve
            gam <- mgcv::gam(y ~ s(x), method = "REML")
            RT_alignment_GAM_models[[c]] <- gam
            x_pred <- seq(min(features$RT,na.rm=T), max(features$RT,na.rm=T), length.out = nrow(features))
            y_pred <- stats::predict(gam, base::data.frame(x = x_pred))
            lim_stats <- grDevices::boxplot.stats(y_pred)
            delta_y <- lim_stats$stats[4] - lim_stats$stats[2]
            ylim <- c(-max(abs(c(lim_stats$stats[2],lim_stats$stats[4])))-delta_y,max(abs(c(lim_stats$stats[2],lim_stats$stats[4])))+delta_y)
            if(ylim[2] < 2)ylim <- c(-2,2)
            graphics::smoothScatter(x,y,ylab="Observed RT calibration",main=sample_list[c],xlab="RT [min]",ylim=ylim)
            graphics::lines(x_pred,y_pred,col="red")
            graphics::legend("topright",legend="GAM",lty=1,col="red")

            ##predict RT correction for all features in current sample
            y_pred <- stats::predict(gam, base::data.frame(x = features$RT))

            ###use GAM to predict RT correction for missing features in current sample
            selection <- which(is.na(RT_calibration_vals[,c]))
            RT_calibration_vals[,c][selection] <- y_pred[selection]

          }else ###not enough observations ... skip
          {
            print(base::paste(sample_list[c],"- Not enough peptide observations for RT-GAM fitting..."))
          }
        }
        QC_data[["RT_calibration_GAM_models"]] <- RT_alignment_GAM_models

        #perform a second round of RT calibration
        #currently it is possible that several features are observed only in a few samples
        #hence observed median RT is driven by these few samples
        #based on GAM models adjust median feature RT and rerun fitting
        expected_raw_RTs <- features$RT + RT_calibration_vals
        expected_raw_RTs[is.na(expected_raw_RTs)] <- 0

        delta_rt_temp <- features$RT_range_max-features$RT
        features$RT <- matrixStats::rowMedians(as.matrix(expected_raw_RTs),na.rm=T)
        features$RT_range_min <- features$RT - delta_rt_temp
        features$RT_range_max <- features$RT + delta_rt_temp

        #restart RT calibration
        #grap known RT once again
        RT_calibration_vals <- RT_calibration_vals_save

        graphics::plot(1, type="n", axes=F, xlab="", ylab="")
        graphics::text(1,1,"RT GAM fitting - Second round of estimation")

        RT_alignment_GAM_models <- list()
        ###fit GAM and predict per sample.
        for(c in 1:ncol(RT_calibration_vals))
        {
          x <- features$RT[which(RT_calibration_vals[,c] != 0)]
          y <- RT_calibration_vals[,c][which(RT_calibration_vals[,c] != 0)]
          if(length(x) > 10) ###require at least 500 data points to perform fitting
          {
            ##try to fit an average generalised additive model to determine a RT dependent calibration curve
            gam <- mgcv::gam(y ~ s(x), method = "REML")
            RT_alignment_GAM_models[[c]] <- gam
            x_pred <- seq(min(features$RT,na.rm=T), max(features$RT,na.rm=T), length.out = nrow(features))
            y_pred <- stats::predict(gam, base::data.frame(x = x_pred))
            lim_stats <- grDevices::boxplot.stats(y_pred)
            delta_y <- lim_stats$stats[4] - lim_stats$stats[2]
            ylim <- c(-max(abs(c(lim_stats$stats[2],lim_stats$stats[4])))-delta_y,max(abs(c(lim_stats$stats[2],lim_stats$stats[4])))+delta_y)
            if(ylim[2] < 2)ylim <- c(-2,2)
            graphics::smoothScatter(x,y,ylab="Observed RT calibration",main=sample_list[c],xlab="RT [min]",ylim=ylim)
            graphics::lines(x_pred,y_pred,col="red")
            graphics::legend("topright",legend="GAM",lty=1,col="red")

            ##predict RT correction for all features in current sample
            y_pred <- stats::predict(gam, base::data.frame(x = features$RT))

            ###use GAM to predict RT correction for missing features in current sample
            selection <- which(is.na(RT_calibration_vals[,c]))
            RT_calibration_vals[,c][selection] <- y_pred[selection]
          }else ###not enough observations ... skip
          {
            print(base::paste(sample_list[c],"- Not enough peptide observations for RT-GAM fitting..."))
          }
        }
        QC_data[["RT_calibration_GAM_models"]] <- RT_alignment_GAM_models
      }
      RT_calibration_vals[is.na(RT_calibration_vals)] <- 0

      if(MassSpec_mode == "TIMSToF" & IM_calibration == T)
      {
        ###use median of IM_calibration per sample for NAs
        for(c in 1:ncol(IM_calibration_vals))
        {
          IM_calibration_vals[,c][is.na(IM_calibration_vals[,c])] <- stats::median(IM_calibration_vals[,c],na.rm=T)
        }
      }

    }else ###no calibration should be done
    {
      RT_calibration_vals[is.na(RT_calibration_vals)] <- 0

      mz_calibration_vals[is.na(mz_calibration_vals)] <- 0

      if(MassSpec_mode == "TIMSToF")IM_calibration_vals[is.na(IM_calibration_vals)] <- 0
    }

    features <- cbind(features,RT_calibration_vals,mz_calibration_vals)
    if(MassSpec_mode == "TIMSToF")features <- cbind(features,IM_calibration_vals)

    utils::write.table(features,base::paste("Temporary_files/Features_aligned_merged",output_file_names_add,".txt",sep=""),row.names = F)

    save(QC_data,file = base::paste("Temporary_files/Feature_alignment_QC_data.RData",sep=""))

    grDevices::dev.off()
    options(warn=0)
    crap <- gc(F)
  }
}

#' Extend IceR features by expected +1-isotopic features
#' @param path_to_features Path to folder where results of align_features() are stored.
#' @param feature_table_file_name File name which contains align_features() results. By default is set to Features_aligned_merged_IceR_analysis.txt.
#' @param min_observations Specifying how many MaxQuant features had to be previously detected for monoisotopic IceR features to be extended by +1-isotopic features. By default set to 0 indicating that all for all monoisotopic IceR features a +1-isotopic feature is added.
#' @details Optional step of the IceR workflow appending the list of IceR features by respective expected +1-isotopic features. Isotopic features are expected to show an m/z shift by +1.002 Da/z relative to the monoisotopic IceR feature. Only IceR features with at least min_observations and with mean Andromeda score > 25 % quantile of all mean Andromeda scores are considered.
#' @return Extended list ist stored in the specified feature_table_file_name.
#' @export
add_isotope_features <- function(path_to_features,feature_table_file_name="Features_aligned_merged_IceR_analysis.txt",min_observations=0)
{
  setwd(base::paste(path_to_features,"/Temporary_files",sep=""))
  features <- utils::read.table(feature_table_file_name,header = T)
  setwd(path_to_features)

  if(!any(grepl("_i",features$Feature_name)))
  {
    ###isotope + 1 features
    isotope_features <- features
    isotope_features <- isotope_features[which(!grepl(";",isotope_features$Sequence)),]
    isotope_features <- isotope_features[which(as.numeric(as.character(isotope_features$num_matches)) >= min_observations),]

    if(nrow(isotope_features)>0)
    {
      quantile25_score <- grDevices::boxplot.stats(as.numeric(as.character(isotope_features$mean_Scores)))$stats[2]
      isotope_features <- isotope_features[which(as.numeric(as.character(isotope_features$mean_Scores)) >= quantile25_score),]
      isotope_features$m.z <- ((isotope_features$m.z*isotope_features$Charge)+1.002054)/isotope_features$Charge
      delta_mz <- isotope_features$m.z_range_max-isotope_features$m.z_range_min
      isotope_features$m.z_range_max <- isotope_features$m.z+(delta_mz/2)
      isotope_features$m.z_range_min <- isotope_features$m.z-(delta_mz/2)
      isotope_features$Feature_name <- base::paste(isotope_features$Feature_name,"_i",sep="")
      features <- rbind(features,isotope_features)
      utils::write.table(features,base::paste("Temporary_files/",feature_table_file_name,sep=""),row.names = F)
      print(paste0(Sys.time()," Added ",nrow(isotope_features)," isotope features."))
    }else
    {
      print(base::paste(Sys.time()," None of the peptides was observed in at least",min_observations,"samples. No isotope features were added."))
    }
  }else
  {
    print(paste0(Sys.time()," Isotope features were already added"))
  }
}

#' Perform quantification of IceR features
#' @param path_to_features Path to folder where results of align_features() are stored
#' @param path_to_mzXML Path to folder containing mzXML files of samples in case of Orbitrap data.
#' @param path_to_MaxQ_output Path to folder containing MaxQuant outputs (txt folder containing at least allpeptides.txt, evidence.txt, peptides.txt and proteinGroups.txt)
#' @param feature_table_file_name File name which contains align_features() results. By default is set to Features_aligned_merged_IceR_analysis.txt.
#' @param output_file_names_add IceR result name tag. By default IceR_analysis
#' @param RT_calibration Boolean value indicating if corrected RT should be used during peak detection, selection and DICE, By default set to T.
#' @param mz_calibration Boolean value indicating if corrected m/z should be used during peak detection, selection and DICE, By default set to T.
#' @param abundance_estimation_correction Boolean value indicating if resulting peptide abundances should be corrected using MaxQuant results as a reference. By default set to T.
#' @param Quant_pVal_cut Numeric value used as diagnostic cutoff border for visualization of significances of ion accumulation per IceR feature quantification. Furthermore, used as cutoff to filter +1-isotopic IceR features with significant accumulation of ions. By default set to 0.05.
#' @param n_cores Numeric value specifying on how many CPU cores tasks should be parallelized. By default set to 2.
#' @param kde_resolution Numeric value specifying number of grid points per dimension. By default set to 50.
#' @param num_peaks_store Numeric value specifying number of 2D peaks to be stored during peak detection. By default set to 5.
#' @param plot_peak_detection Boolean value indicating if for every feature quantification the determined kernel density estimations and detected peaks should be visualized and stored. By default set to F.
#' @param alignment_variability_score_cutoff Numeric value specifying significance cutoff to distinguish which features show high RT- or m/z-variability of selected peaks between samples. By default set to 0.05. All features showing significant general variability (variability score < alignment_variability_score_cutoff) are excluded.
#' @param alignment_scores_cutoff Numeric value specifying significance cutoff to distinguish which samples show high RT- or m/z-variability of selected peaks for respective IceR feature. By default set to 0.05. All samples showing significant peak variability for respective IceR feature (variability score < alignment_scores_cutoff) are excluded (quantification set to NA).
#' @param mono_iso_alignment_cutoff Numeric value specifying significance cutoff to distinguish which samples show high RT- or m/z-variability of selected peaks for +1-isotopic from corresponding monoisotopic IceR feature. By default set to 0.05. All samples showing significant peak variability between selected +1-isotopic and monoisotopic IceR features (variability score < mono_iso_alignment_cutoff) are excluded (quantification of +1-isotopic feature set to NA).
#' @param calc_peptide_LFQ Boolean value specifying if multiply peptide quantification data for same peptide sequence (multiply charge states, isotope-states) should be aggregated using the MaxLFQ algorithm. By default set to F.
#' @param calc_protein_LFQ Boolean value specifying if protein quantification should be additionally performed by peptide quantification aggregation using the MaxLFQ algorithm. By default set to T.
#' @param MassSpec_mode String being either "Orbitrap" or "TIMSToF" specifying by which type of Mass Spectrometer the data was generated. By default it expects Thermo Orbitrap data.
#' @param use_IM_data Boolean value indicating if ion mobility information should be used during feature quantification in case of TIMS-ToF data. By default set to T.
#' @param path_to_extracted_spectra Path to folder containing extracted spectra files of samples in case of TIMS-ToF data.
#' @import foreach
#' @export
#' @details Performs final steps of the IceR workflow:
#' 1) Estimation of background noise per IceR feature quantification.
#' 2) 2D Kernel density estimation-based peak detection, selection and DICE-based quantification of IceR features.
#' 3) Determination of significances of ion accumulations per IceR feature quantification.
#' 4) Quality control of peak selections.
#' 5) IceR peak selection accuracy estimations per sample.
#' 6) Optional: Imputation of missing IceR feature quantifications using estimated backgroudn noise models per sample.
#' 7) Protein quantification by aggregating available peptide quantifications.
#' @return Outputs are stored in the specified output folder and intermediate results are stored in the sub-directory Temporary_files.
#' Quantification results are stored in tab-delimited text files (.tab):
#' Feature information - Features_DDAiceR_Analysis.tab
#' Peak alignment scores - Features_quantification_alignment_score_DDAiceR_Analysis.tab
#' Feature quantifications - Features_quantification_DDAiceR_Analysis.tab
#' Feature quantifications after imputation - Features_quantification_imputed_DDAiceR_Analysis.tab
#' Numbers of observed ions per feature quantification - Features_quantification_ioncount_DDAiceR_Analysis.tab
#' Alignment scores between monoisotopic and corresponding +1isotopic IceR features - Features_quantification_mono_iso_alignment_score_DDAiceR_Analysis.tab
#' Significance of ion accumulations per feature quantification - Features_quantification_pvals_DDAiceR_Analysis.tab
#' Signal to background intensity ratios per feature quantification - Features_quantification_S2B_DDAiceR_Analysis.tab
#' General variability score of peak selections - Features_quantification_variability_score_DDAiceR_Analysis.tab
#' Protein quantification - Proteins_quantification_LFQ_DDAiceR_Analysis.tab
#' Protein quantification after imputation - Proteins_quantification_LFQ_imputed_DDAiceR_Analysis.tab
#' QC results of background noise estimations are visualized in "Decoy feature quantification parameters.pdf".
#' QC results of peak selections are visualized in "Alignment and quantification scores.pdf".
#' The performance of RT- and m/z-alignments over samples is visualized in "Performance of feature alignment.pdf"
#' Estimation of required peptide abundance correction factors is visualized in "Correct feature abundance estimations Signal_Background_intensity.pdf".
#' Intermediate results of the function are stored in RData files.
requantify_features <- function(path_to_features,path_to_mzXML=NA,path_to_MaxQ_output,feature_table_file_name="Features_aligned_merged_IceR_analysis.txt",output_file_names_add="IceR_analysis",RT_calibration=T,mz_calibration=T,abundance_estimation_correction = T,Quant_pVal_cut=0.05,n_cores=2,kde_resolution = 50,num_peaks_store = 5,plot_peak_detection=F,alignment_variability_score_cutoff=0.05,alignment_scores_cutoff=0.05,mono_iso_alignment_cutoff=0.05,calc_peptide_LFQ=F,calc_protein_LFQ=T,MassSpec_mode=c("Orbitrap","TIMSToF"),use_IM_data = T,path_to_extracted_spectra=NA)
{

  options(warn=-1)
  #suppressWarnings(suppressMessages(library(mgcv,quietly = T)))
  multiply_intensity_count=F
  peak_detection=T

  QC_data <- list() ##here relevant qc data is stored and finally saved as RData which can be used for re-generating plots

  mean_background_intensity <- NA
  sd_background_intensity <- NA
  n_background_intensity <- NA

  setwd(base::paste(path_to_features,"/Temporary_files",sep=""))
  features <- utils::read.table(feature_table_file_name,header = T)
  setwd(path_to_features)

  ###Check peak widths and increase very small peak widths while reduce extreme long outlier peak widths
  stats <- grDevices::boxplot.stats(features$RT_length)
  features$RT_length[which(features$RT_length < stats$stats[2])] <- stats$stats[2] ##lower than 25 % quantile
  features$RT_length[which(features$RT_length > stats$stats[5])] <- stats$stats[5] ##if RT length > upper whisker --> shorten to upper whisker
  features$RT_length[is.na(features$RT_length)] <- stats$stats[3]

  if(MassSpec_mode == "TIMSToF")
  {
    stats <- grDevices::boxplot.stats(features$Inv_K0_length)
    features$Inv_K0_length[which(features$Inv_K0_length < stats$stats[2])] <- stats$stats[2] ##lower than 25 % quantile
    features$Inv_K0_length[which(features$Inv_K0_length > stats$stats[5])] <- stats$stats[5] ##if IM length > upper whisker --> shorten to upper whisker
    features$Inv_K0_length[is.na(features$Inv_K0_length)] <- stats$stats[3]
  }

  ###Add decoy features
  features_decoy <- features[which(!grepl("_pmp|_i",features$Feature_name)),]
  features_decoy$Feature_name <- base::paste(features_decoy$Feature_name,"_d",sep="")

  RT_ranges <- features_decoy$RT_range_max-features_decoy$RT_range_min
  mz_ranges <- features_decoy$m.z_range_max-features_decoy$m.z_range_min
  median_RT_window <- stats::median(features_decoy$RT_range_max-features_decoy$RT_range_min,na.rm=T)
  median_mz_window <- stats::median(features_decoy$m.z_range_max-features_decoy$m.z_range_min,na.rm=T)

  features_decoy$RT <- features_decoy$RT + 5*median_RT_window
  features_decoy$m.z <- features_decoy$m.z + 5*median_mz_window

  features_decoy$m.z_range_min <- features_decoy$m.z-(mz_ranges/2)
  features_decoy$m.z_range_max <- features_decoy$m.z+(mz_ranges/2)

  features_decoy$RT_range_min <- features_decoy$RT-(RT_ranges/2)
  features_decoy$RT_range_max <- features_decoy$RT+(RT_ranges/2)

  ###adjust observed RT and mz accordingly
  Observed_RT <- as.matrix(stringr::str_split(features_decoy$Observed_RT,";",simplify = T))
  class(Observed_RT) <- "numeric"
  Observed_RT <- Observed_RT + 5*median_RT_window
  features_decoy$Observed_RT <- apply(Observed_RT, 1, base::paste, collapse=";")

  Observed_mz <- as.matrix(stringr::str_split(features_decoy$Observed_mz,";",simplify = T))
  class(Observed_mz) <- "numeric"
  Observed_mz <- Observed_mz + 5*median_mz_window
  features_decoy$Observed_mz <- apply(Observed_mz, 1, base::paste, collapse=";")


  if(MassSpec_mode == "TIMSToF")
  {
    IM_ranges <- features_decoy$Inv_K0_range_max-features_decoy$Inv_K0_range_min
    median_IM_window <- stats::median(IM_ranges,na.rm=T)
    features_decoy$Inv_K0 <- features_decoy$Inv_K0 + 5*median_IM_window

    Observed_IM <- as.matrix(stringr::str_split(features_decoy$Observed_IM,";",simplify = T))
    class(Observed_IM) <- "numeric"
    Observed_IM <- Observed_IM + 5*median_IM_window
    features_decoy$Observed_IM <- apply(Observed_IM, 1, base::paste, collapse=";")
  }

  features <- rbind(features,features_decoy)

  if(output_file_names_add != "")output_file_names_add <- base::paste("_",output_file_names_add,sep="")

  ##Step 1 - Summarize ion intensities per aligned MaxQuant feature
  ###Function to perform extraction on multiple threads. Depending on the available ram, the extraction can be run on several threads. However, the task is using much memory so that it is recommended to just run 2 threads in parallel if only 16 gb of ram are available.
  extract_intensities_worker <- function(Sample_IDs,features_select,path_to_raw,path_to_output_folder,RT_calibration,mz_calibration,peak_detection,n_cores,ion_intensity_cutoff=F,mean_background_ion_intensity_model = NA,sd_background_ion_intensity = NA,peak_min_ion_count=NA,kde_resolution=25,num_peaks_store=5,plots=F,MassSpec_mode="Orbitrap",use_IM_data=T)
  {
    #suppressWarnings(suppressMessages(library(doParallel,quietly = T)))

    ####Function to extract intensities in respective extracted .RData for a list of selected features (or all features).
    ####indexing_RT_window defines how large each RT indexing window is to speed up subsetting. 0.5 min was observed to be good
    get_intensities <- function(Sample_ID,path,features_select,indexing_RT_window=0.1,RT_calibration,mz_calibration,peak_detection,ion_intensity_cutoff,mean_background_ion_intensity_model,sd_background_ion_intensity,peak_min_ion_count,kde_resolution,num_peaks_store,plots,MassSpec_mode,use_IM_data)
    {
      #suppressWarnings(suppressMessages(library(lubridate,quietly = T)))
      #suppressWarnings(suppressMessages(library(stringr,quietly = T)))

      if(any(!is.na(mean_background_ion_intensity_model)))
      {
        colnames(mean_background_ion_intensity_model) <- base::gsub("-",".",colnames(mean_background_ion_intensity_model))
        names(sd_background_ion_intensity) <- base::gsub("-",".",names(sd_background_ion_intensity))
      }

      ###if no peak selection should be performed, extract all ions within the expected feature windows
      feature_no_peak_selection <- function(all_ion_data,selected_features,cur_sample,include_isotope_patterns = F,num_peaks_store=0,MassSpec_mode="Orbitrap",use_IM_data=T)
      {
        peak_ion_data_list <- list()

        ###define RT and mz window
        RT_expected <- selected_features$RT + selected_features[,base::paste("RT_calibration.",cur_sample,sep="")]
        mz_expected <- selected_features$m.z + selected_features[,base::paste("mz_calibration.",cur_sample,sep="")]

        RT_window <- c(RT_expected - (selected_features$RT_length/2),
                       RT_expected + (selected_features$RT_length/2))

        mz_window <- c(selected_features$m.z_range_min + selected_features[,base::paste("mz_calibration.",cur_sample,sep="")],
                       selected_features$m.z_range_max + selected_features[,base::paste("mz_calibration.",cur_sample,sep="")])

        if(MassSpec_mode == "TIMSToF")
        {
          im_expected <- selected_features$Inv_K0 + selected_features[,base::paste("IM_calibration.",cur_sample,sep="")]

          IM_window <- c(im_expected - (selected_features$Inv_K0_length/2),
                         im_expected + (selected_features$Inv_K0_length/2))
        }

        if(MassSpec_mode == "Orbitrap")
        {
          ###select relevant ions
          ion_data <- all_ion_data[which(all_ion_data$m.z >= mz_window[1] &
                                           all_ion_data$m.z <= mz_window[2] &
                                           all_ion_data$RT >= RT_window[1] &
                                           all_ion_data$RT <= RT_window[2]),]
          ion_data <- stats::na.omit(cbind(ion_data,data.frame(isotope=0)))

          #ion_data$isotope=0

          ##add isotope ions if wanted
          if(include_isotope_patterns == T)
          {
            iso_mult <- (1:3)*1.002054
            cur_charge <- selected_features$Charge
            cur_mz_min = mz_window[1]
            cur_mz_max = mz_window[2]
            for(isos in c(1,2,3))###isotope +1,+2,+3
            {
              mz_range <- c(((cur_mz_min*cur_charge)+(iso_mult[isos]))/cur_charge,((cur_mz_max*cur_charge)+(iso_mult[isos]))/cur_charge)

              selection <- which(all_ion_data$m.z >= mz_range[1] &
                                   all_ion_data$m.z <= mz_range[2] &
                                   all_ion_data$RT >= RT_window[1] &
                                   all_ion_data$RT <= RT_window[2])
              if(length(selection)>0)
              {
                temp <- all_ion_data[selection,]
                temp$isotope=isos
                ion_data <- rbind(ion_data,temp)
              }else
              {
                break
              }
            }

            ion_data <- ion_data[order(ion_data$RT),]
            ###only keep isotope ions which are present in spectra where also the previous isotope ions was detected
            for(iso in c(1,2,3))
            {
              if(length(which(ion_data$isotope == iso))>0 & length(which(ion_data$isotope == iso-1))>0)
              {
                iso_current <- which(ion_data$isotope == iso)

                iso_minus1 <- ion_data[which(ion_data$isotope == iso-1),]

                remove <- which(ion_data$RT[iso_current] %not in% iso_minus1$RT)
                if(length(remove)>0)iso_current <- iso_current[remove]
                if(length(iso_current)>0)ion_data <- ion_data[-iso_current,]
              }else
              {
                remove <- which(ion_data$isotope == iso)
                if(length(remove)>0)ion_data <- ion_data[-remove,]
              }
            }

            ###correct m/z of +1,+2 and +3 isotope ions to its theoretical +0 isotope m/z
            ion_data$m.z <- ((ion_data$m.z*cur_charge)-(ion_data$isotope*1.002054))/cur_charge
          }

          peak_ion_data_list[[as.character(num_peaks_store+1)]] <- list(ion_data=ion_data,
                                                                        Peak_info=base::data.frame(RT=RT_expected,
                                                                                             RT_window_lower=RT_window[1],
                                                                                             RT_window_upper=RT_window[2],
                                                                                             mz=mz_expected,
                                                                                             mz_window_lower=mz_window[1],
                                                                                             mz_window_upper=mz_window[2],
                                                                                             density=0,
                                                                                             known_peak=0,
                                                                                             Peak=num_peaks_store+1,
                                                                                             ion_count=nrow(ion_data)))
        }
        if(MassSpec_mode == "TIMSToF")
        {
          ###select relevant ions
          if(use_IM_data == T)
          {
            ion_data <- all_ion_data[which(all_ion_data$m.z >= mz_window[1] &
                                             all_ion_data$m.z <= mz_window[2] &
                                             all_ion_data$RT >= RT_window[1] &
                                             all_ion_data$RT <= RT_window[2] &
                                             all_ion_data$`1/K0` >= IM_window[1] &
                                             all_ion_data$`1/K0` <= IM_window[2]),]
          }else
          {
            ion_data <- all_ion_data[which(all_ion_data$m.z >= mz_window[1] &
                                             all_ion_data$m.z <= mz_window[2] &
                                             all_ion_data$RT >= RT_window[1] &
                                             all_ion_data$RT <= RT_window[2]),]
          }
          ion_data <- stats::na.omit(cbind(ion_data,data.frame(isotope=0)))

          #ion_data$isotope=0

          peak_ion_data_list[[as.character(num_peaks_store+1)]] <- list(ion_data=ion_data,
                                                                        Peak_info=base::data.frame(RT=RT_expected,
                                                                                             RT_window_lower=RT_window[1],
                                                                                             RT_window_upper=RT_window[2],
                                                                                             mz=mz_expected,
                                                                                             mz_window_lower=mz_window[1],
                                                                                             mz_window_upper=mz_window[2],
                                                                                             density=0,
                                                                                             known_peak=0,
                                                                                             Peak=num_peaks_store+1,
                                                                                             ion_count=nrow(ion_data)))
          #IM=im_expected,
          #IM_window_lower=IM_window[1],
          #IM_window_upper=IM_window[2]))
        }

        names(peak_ion_data_list) <- "Standard"
        return(peak_ion_data_list)

      }

      ###if peak selection should be performed
      feature_2D_peak_selection <- function(all_ion_data,selected_features,cur_sample,known_RT,delta_mz,delta_rt,RT_window_expand_factor=5,IM_window_expand_factor=5,mz_window_expand_factor=4,include_isotope_patterns = F,n_raster_dens_matrix=50,local_maxima_k=3,max_delta_RT=2,max_delta_mz=0.005,peak_min_ion_count=5,RT_bw=0.5,mz_bw=0.002,num_peaks_store=5,plot=F,auto_adjust_kde_resolution=T,MassSpec_mode="Orbitrap",delta_im=0.001,close_peak_merging=F,use_IM_data=T)
      {
        #suppressWarnings(suppressMessages(library(MASS,quietly = T)))
        #suppressWarnings(suppressMessages(library(raster,quietly = T)))
        #suppressWarnings(suppressMessages(library(ggtern,quietly = T)))
        peak_ion_data_list <- list()

        graph <- NA ##here we store graphical output if wanted

        ###define RT and mz window
        RT_correction <- as.numeric(selected_features[base::paste("RT_calibration.",cur_sample,sep="")])
        mz_correction <- as.numeric(selected_features[base::paste("mz_calibration.",cur_sample,sep="")])

        RT_expected <- as.numeric(selected_features$RT+RT_correction)
        mz_expected <- as.numeric(selected_features$m.z+mz_correction)


        if(delta_rt > selected_features$RT_length/2) ###use standard window as long as the peak width is < standard RT window
        {
          RT_window <- c(selected_features$RT_range_min + RT_correction,
                         selected_features$RT_range_max + RT_correction)
        }else
        {
          RT_window <- c(selected_features$RT + RT_correction - (selected_features$RT_length/2),
                         selected_features$RT + RT_correction + (selected_features$RT_length/2))
        }

        mz_window <- c(selected_features$m.z_range_min + mz_correction,
                       selected_features$m.z_range_max + mz_correction)

        RT_window_expanded <- c(RT_window[1]-((delta_rt)*RT_window_expand_factor),
                                RT_window[2]+((delta_rt)*RT_window_expand_factor))

        if(RT_window_expanded[2]-RT_window_expanded[1] > 10) ###maximum 10 min deviation
        {
          RT_window_expanded[1] <- RT_window[1]-5
          RT_window_expanded[2] <- RT_window[2]+5
        }

        mz_window_expanded <- c(mz_window[1]-(delta_mz*mz_window_expand_factor),
                                mz_window[2]+(delta_mz*mz_window_expand_factor))

        if(MassSpec_mode == "TIMSToF")
        {
          IM_correction <- as.numeric(selected_features[base::paste("IM_calibration.",cur_sample,sep="")])
          IM_expected <- as.numeric(selected_features$Inv_K0+IM_correction)

          if(delta_im > selected_features$Inv_K0_length/2) ###use standard window as long as the peak width is < standard IM window
          {
            IM_window <- c(selected_features$Inv_K0_range_min + IM_correction,
                           selected_features$Inv_K0_range_max + IM_correction)
          }else
          {
            IM_window <- c(selected_features$Inv_K0 + IM_correction - (selected_features$Inv_K0_length/2),
                           selected_features$Inv_K0 + IM_correction + (selected_features$Inv_K0_length/2))
          }
          IM_window_expanded <- c(IM_window[1]-(delta_im*IM_window_expand_factor),
                                  IM_window[2]+(delta_im*IM_window_expand_factor))
        }


        ###filter for ions in the expanded window
        if(MassSpec_mode == "Orbitrap")
        {
          ion_data <- all_ion_data[which(all_ion_data$m.z >= mz_window_expanded[1] &
                                           all_ion_data$m.z <= mz_window_expanded[2] &
                                           all_ion_data$RT >= RT_window_expanded[1] &
                                           all_ion_data$RT <= RT_window_expanded[2]),]
        }else
        {
          if(use_IM_data == T)
          {
            ion_data <- all_ion_data[which(all_ion_data$m.z >= mz_window_expanded[1] &
                                             all_ion_data$m.z <= mz_window_expanded[2] &
                                             all_ion_data$RT >= RT_window_expanded[1] &
                                             all_ion_data$RT <= RT_window_expanded[2] &
                                             all_ion_data$`1/K0` >= IM_window[1] &
                                             all_ion_data$`1/K0` <= IM_window[2]),]
          }else
          {
            ion_data <- all_ion_data[which(all_ion_data$m.z >= mz_window_expanded[1] &
                                             all_ion_data$m.z <= mz_window_expanded[2] &
                                             all_ion_data$RT >= RT_window_expanded[1] &
                                             all_ion_data$RT <= RT_window_expanded[2]),]
          }
        }

        ion_data <- stats::na.omit(cbind(ion_data,data.frame(isotope=0)))
        #ion_data$isotope=0

        ##add isotope ions if wanted
        if(include_isotope_patterns == T & nrow(ion_data)>0)
        {
          iso_mult <- (1:3)*1.002054

          cur_charge <- selected_features$Charge
          cur_mz_min = mz_window_expanded[1]
          cur_mz_max = mz_window_expanded[2]
          for(isos in c(1,2,3))###isotope +1,+2,+3
          {
            mz_range <- c(((cur_mz_min*cur_charge)+(iso_mult[isos]))/cur_charge,((cur_mz_max*cur_charge)+(iso_mult[isos]))/cur_charge)

            selection <- which(all_ion_data$m.z >= mz_range[1] &
                                 all_ion_data$m.z <= mz_range[2] &
                                 all_ion_data$RT >= RT_window_expanded[1] &
                                 all_ion_data$RT <= RT_window_expanded[2])
            if(length(selection)>0)
            {
              temp <- all_ion_data[selection,]
              temp$isotope=isos
              ion_data <- rbind(ion_data,temp)
            }else
            {
              break
            }
          }

          ion_data <- ion_data[order(ion_data$RT),]
          ###only keep isotope ions which are present in spectra where also the previous isotope ions was detected
          for(iso in c(1,2,3))
          {
            if(length(which(ion_data$isotope == iso))>0 & length(which(ion_data$isotope == iso-1))>0)
            {
              iso_current <- which(ion_data$isotope == iso)

              iso_minus1 <- ion_data[which(ion_data$isotope == iso-1),]

              remove <- which(ion_data$RT[iso_current] %not in% iso_minus1$RT)
              if(length(remove)>0)iso_current <- iso_current[remove]
              if(length(iso_current)>0)ion_data <- ion_data[-iso_current,]
            }else
            {
              remove <- which(ion_data$isotope == iso)
              if(length(remove)>0)ion_data <- ion_data[-remove,]
            }
          }

          ###correct m/z of +1,+2 and +3 isotope ions to its theoretical +0 isotope m/z
          ion_data$m.z <- ((ion_data$m.z*cur_charge)-(ion_data$isotope*1.002054))/cur_charge
        }

        if(nrow(ion_data) >= peak_min_ion_count)
        {
          ###determine 2D density matrix
          #consider different resolutions required for peaks with expected small RT lengths
          if(auto_adjust_kde_resolution==T)
          {
            required_res <- ceiling(((RT_window_expanded[2]-RT_window_expanded[1])/((selected_features$RT_length/1.9)*2))+1)
            if(required_res > n_raster_dens_matrix)n_raster_dens_matrix <- required_res
          }

          f2 <- MASS::kde2d(ion_data$RT, ion_data$m.z, n = n_raster_dens_matrix,
                      h = c(RT_bw, mz_bw),lims = c(RT_window_expanded[1],RT_window_expanded[2],mz_window_expanded[1],mz_window_expanded[2]))

          #area_per_cell <- ((max(f2$x)-min(f2$x)))*((max(f2$y)-min(f2$y)))/2

          ###determine which density (=z) will be selected to be at least exceeded --> upper whisker
          outlier_densities <- grDevices::boxplot.stats(f2$z)$stats[5]

          ## Convert it to a raster object
          r <- raster::raster(f2$z)
          raster::extent(r) <- raster::extent(c(0, length(f2$x), 0, length(f2$y)) + 0.5)

          ## Find the maximum value within the k-cell neighborhood of each cell
          f <- function(X) max(X, na.rm=TRUE)
          ww <- matrix(1, nrow=local_maxima_k, ncol=local_maxima_k) ## Weight matrix for cells in moving window
          localmax <- raster::focal(r, fun=f, w=ww, pad=TRUE, padValue=NA)

          ## Get x-y coordinates of those cells that are local maxima
          r2 <- r==localmax
          maxXY <- raster::xyFromCell(r2, raster::Which(r2==1, cells=TRUE))

          ## Remove maxima which are below the minimal outlier density
          maxima <- base::data.frame(RT=f2$x[n_raster_dens_matrix-maxXY[,2]+1],mz=f2$y[maxXY[,1]],density=f2$z[((maxXY[,1]-1)*n_raster_dens_matrix)+(n_raster_dens_matrix-maxXY[,2]+1)])
          maxima <- maxima[which(maxima$density > outlier_densities),]
          ###next filter for maxima with at least peak_min_ion_count ions
          maxima$estimated_count <- (maxima$density/sum(as.matrix(maxima$density)))*nrow(ion_data)

          RT_cut <- selected_features$RT_length/2#ifelse(!is.na(known_RT),selected_features$RT_length/2,delta_rt)
          selection <- which(maxima$estimated_count >= peak_min_ion_count | abs(maxima$RT-RT_expected)<=RT_cut & abs(maxima$mz-mz_expected)<=delta_mz)
          ###if no maxima are left after filtering still take topN closest peak further
          if(length(selection)==0 & nrow(maxima) > 0)
          {
            ###no maxima would be left see keep up to N peaks
            distances <- base::data.frame(dist_rt=maxima$RT-RT_expected,
                                    dist_mz=maxima$mz-mz_expected,
                                    dist_total_scaled=sqrt(((maxima$RT-RT_expected))^2 + ((maxima$mz-mz_expected)*500)^2))


            ###at maximum the top N closest maxima
            ordering <- order(abs(distances$dist_total_scaled))
            maxima <- maxima[ordering,]
            if(nrow(maxima)>num_peaks_store) maxima <- maxima[1:num_peaks_store,]
          }else if(nrow(maxima) > 0)
          {
            maxima <- maxima[selection,]
          }

          ###next merge peaks which are very close together
          if(nrow(maxima)>1 & close_peak_merging == T)
          {
            close_info <- base::as.data.frame(matrix(ncol=2,nrow=nrow(maxima),0))

            for(cl in 1:nrow(maxima))
            {
              temp <- sqrt((maxima$RT[cl]-maxima$RT)^2+((maxima$mz[cl]-maxima$mz)*500)^2)
              sel <- which(temp == min(temp[-cl]))[1]
              data.table::set(close_info,as.integer(cl),as.integer(1:2),value = list(temp[sel],sel))
            }

            dist_cut <- sqrt((delta_mz*500)^2+(selected_features$RT_length/4)^2) ###merge peaks with d_mz < 0.001 and d_RT < peak_width/2
            if(any(close_info$V1 < dist_cut))
            {
              sel <- which(close_info$V1 < dist_cut)

              maxima_add <- base::as.data.frame(matrix(nrow=length(sel),ncol=4,0))
              colnames(maxima_add) <- colnames(maxima)
              ###find mean RT and mz of maxima which should be merged wheigted by density
              for(cl in 1:length(sel))
              {
                RT_add <- stats::weighted.mean(c(maxima$RT[sel[cl]],maxima$RT[close_info[sel[cl],2]]),
                                        c(maxima$density[sel[cl]],maxima$density[close_info[sel[cl],2]]))
                mz_add <- stats::weighted.mean(c(maxima$mz[sel[cl]],maxima$mz[close_info[sel[cl],2]]),
                                        c(maxima$density[sel[cl]],maxima$density[close_info[sel[cl],2]]))
                density_add <- stats::weighted.mean(c(maxima$density[sel[cl]],maxima$density[close_info[sel[cl],2]]),
                                             c(maxima$density[sel[cl]],maxima$density[close_info[sel[cl],2]]))
                count_add <- stats::weighted.mean(c(maxima$estimated_count[sel[cl]],maxima$estimated_count[close_info[sel[cl],2]]),
                                           c(maxima$density[sel[cl]],maxima$density[close_info[sel[cl],2]]))
                data.table::set(maxima_add,as.integer(cl),as.integer(1:4),value=list(RT_add,mz_add,density_add,count_add))
              }
              maxima_add <- unique(maxima_add)
              ###remove maxima which are merged
              remove <- unique(append(sel,close_info[sel,2]))
              maxima <- maxima[-remove,]
              ###add new merged peaks
              maxima <- rbind(maxima,maxima_add)
            }
          }

          ###now check which density maxima is closest to the expected feature
          if(nrow(maxima)>0)
          {
            distances <- base::data.frame(dist_rt=maxima$RT-RT_expected,
                                    dist_mz=maxima$mz-mz_expected,
                                    dist_total_scaled=sqrt(((maxima$RT-RT_expected))^2 + ((maxima$mz-mz_expected)*500)^2))
            ###at maximum the top N closest maxima
            ordering <- order(abs(distances$dist_total_scaled))
            maxima_select <- maxima[ordering,]#which(abs(distances$dist_rt[ordering]) <= max_delta_RT & abs(distances$dist_mz[ordering]) <= max_delta_mz),]

            if(nrow(maxima_select)>num_peaks_store) maxima_select <- maxima_select[1:num_peaks_store,]

            if(nrow(maxima_select)>0)
            {
              ###label which peak was closest to expected feature RT and mz if known
              maxima_select$known_peak <- 0
              if(!is.na(known_RT))
              {
                ###RT is known
                ###check if any peak was detected close to known RT (max deviation by peak width/2)
                peak_close_rt <- which(abs(maxima_select$RT - RT_expected) <= selected_features$RT_length/1.9 & abs(maxima_select$mz - mz_expected) <= delta_mz/1.9)
                ###and if the closest peak actually also shows a good number of ions
                ###otherwise no peak will be selected here but selection will be done later
                if(length(peak_close_rt)>0 & maxima_select$estimated_count[1] >= 2* peak_min_ion_count)
                {
                  maxima_select$known_peak[peak_close_rt[1]] <- 1
                  ###reorder maxima selected such that known peak maximum is at index 1
                  maxima_select <- maxima_select[c(peak_close_rt[1],c(1:nrow(maxima_select))[-peak_close_rt[1]]),]
                }else ###peak RT is known but none of the detected peaks shows RT close to the known RT
                {
                  ###in this case none of the maxima are set to be the known peak and later we will try to decide which one is the correct one
                }
              }
              ###if any maximum was detected, return table of ions as a list containing up to 3 peak area ion lists from closest to further away peaks.
              RT_window_width <- selected_features$RT_length
              mz_window_width <- delta_mz

              peak_counter=0

              for(i in 1:nrow(maxima_select))
              {
                cur_mz_window <- c(maxima_select$mz[i]-(mz_window_width),
                                   maxima_select$mz[i]+(mz_window_width))
                cur_RT_window <- c(maxima_select$RT[i]-(RT_window_width/2),
                                   maxima_select$RT[i]+(RT_window_width/2))

                selection <- which(ion_data$m.z >= cur_mz_window[1] &
                                     ion_data$m.z <= cur_mz_window[2] &
                                     ion_data$RT >= cur_RT_window[1] &
                                     ion_data$RT <= cur_RT_window[2])

                if(length(selection)<peak_min_ion_count & maxima_select$known_peak[i] == 1)maxima_select$known_peak[i] <- 0 ###if this peak should be the correct peak but number of ions are very low then set to unknown correct peak

                peak_counter <- peak_counter + 1

                peak_ion_data_list[[peak_counter]] <- list(ion_data=ion_data[selection,],
                                                           Peak_info=base::data.frame(RT=maxima_select$RT[i],
                                                                                RT_window_lower=cur_RT_window[1],
                                                                                RT_window_upper=cur_RT_window[2],
                                                                                mz=maxima_select$mz[i],
                                                                                mz_window_lower=cur_mz_window[1],
                                                                                mz_window_upper=cur_mz_window[2],
                                                                                density=maxima_select$density[i],
                                                                                known_peak=maxima_select$known_peak[i],
                                                                                Peak=peak_counter,
                                                                                ion_count=length(selection)))

              }
              if(peak_counter>0)names(peak_ion_data_list) <- as.character(1:peak_counter)
              cur_RT_window <- c(RT_expected-(RT_window_width/2),RT_expected+(RT_window_width/2))
              ###also add quantification if standard windows are used
              selection <- which(ion_data$m.z >= mz_window[1] &
                                   ion_data$m.z <= mz_window[2] &
                                   ion_data$RT >= cur_RT_window[1] &
                                   ion_data$RT <= cur_RT_window[2])
              peak_ion_data_list[[as.character(num_peaks_store+1)]] <- list(ion_data=ion_data[selection,],
                                                                            Peak_info=base::data.frame(RT=RT_expected,
                                                                                                 RT_window_lower=cur_RT_window[1],
                                                                                                 RT_window_upper=cur_RT_window[2],
                                                                                                 mz=mz_expected,
                                                                                                 mz_window_lower=mz_window[1],
                                                                                                 mz_window_upper=mz_window[2],
                                                                                                 density=0,
                                                                                                 known_peak=0,
                                                                                                 Peak=num_peaks_store+1,
                                                                                                 ion_count=length(selection)))
              ###plot should be directly generated
              if(plot==T)
              {
                graphics::image(f2,main=base::paste(cur_sample,"-",selected_features$Feature_name),xlab="RT",ylab="m/z",xlim=RT_window_expanded,ylim=mz_window_expanded)
                #graphics::points(maxima_select$RT,maxima_select$mz)
                graphics::text(maxima_select$RT,maxima_select$mz,1:nrow(maxima_select),col="black")
                ###expected window
                graphics::rect(cur_RT_window[1],mz_window[1],cur_RT_window[2],mz_window[2],lty=2)
                graphics::points(RT_expected,mz_expected,pch=4)
                ###adjusted window

                graphics::rect(maxima_select$RT-RT_window_width/2,maxima_select$mz-mz_window_width,maxima_select$RT+RT_window_width/2,maxima_select$mz+mz_window_width,lty=2,border=ifelse(maxima_select$known_peak==1,"green","darkgrey"))

                ###add label indicating intensity within the window
                for(i in 1:length(peak_ion_data_list))
                {
                  sum_intensity <- sum(sum(peak_ion_data_list[[i]]$ion_data$Intensity))
                  if(sum_intensity > 0)sum_intensity <- base::log2(sum_intensity)
                  if(peak_ion_data_list[[i]]$Peak_info$Peak != num_peaks_store+1)
                  {
                    graphics::text(peak_ion_data_list[[i]]$Peak_info$RT,peak_ion_data_list[[i]]$Peak_info$mz-(1.1*delta_mz),round(sum_intensity,2),col=ifelse(peak_ion_data_list[[i]]$Peak_info$known_peak==1,"green","darkgrey"))
                  }else
                  {
                    graphics::text(peak_ion_data_list[[i]]$Peak_info$RT,peak_ion_data_list[[i]]$Peak_info$mz+(1.1*delta_mz),round(sum_intensity,2),col="black")
                  }

                }
              }else ###store all relevant data for performing plotting later
              {
                peak_intensities <- base::as.data.frame(matrix(ncol=1,nrow=length(peak_ion_data_list)))
                peak_intensities[,1] <- as.numeric(peak_intensities[,1])

                for(i in 1:length(peak_ion_data_list))
                {
                  sum_intensity <- sum(sum(peak_ion_data_list[[i]]$ion_data$Intensity))
                  if(sum_intensity > 0)sum_intensity <- base::log2(sum_intensity)
                  data.table::set(peak_intensities,as.integer(i),as.integer(1),sum_intensity)
                }
                graph <- list(kdemap=f2,
                              maxima_select=maxima_select,
                              peak_intensities=peak_intensities)
              }
            }else ###no maxima with a minmal density found
            {
              ###in this case simply extract ions which are within the expected window
              RT_window_width <- selected_features$RT_length
              cur_RT_window <- c(RT_expected-(RT_window_width/2),RT_expected+(RT_window_width/2))

              selection <- which(ion_data$m.z >= mz_window[1] &
                                   ion_data$m.z <= mz_window[2] &
                                   ion_data$RT >= cur_RT_window[1] &
                                   ion_data$RT <= cur_RT_window[2])
              peak_ion_data_list[[as.character(num_peaks_store+1)]] <- list(ion_data=ion_data[selection,],
                                                                            Peak_info=base::data.frame(RT=RT_expected,
                                                                                                 RT_window_lower=cur_RT_window[1],
                                                                                                 RT_window_upper=cur_RT_window[2],
                                                                                                 mz=mz_expected,
                                                                                                 mz_window_lower=mz_window[1],
                                                                                                 mz_window_upper=mz_window[2],
                                                                                                 density=0,
                                                                                                 known_peak=0,
                                                                                                 Peak=4,
                                                                                                 ion_count=length(selection)))
            }

          }else ###no maxima found
          {
            ###in this case simply extract ions which are within the expected window
            RT_window_width <- selected_features$RT_length
            cur_RT_window <- c(RT_expected-(RT_window_width/2),RT_expected+(RT_window_width/2))

            selection <- which(ion_data$m.z >= mz_window[1] &
                                 ion_data$m.z <= mz_window[2] &
                                 ion_data$RT >= cur_RT_window[1] &
                                 ion_data$RT <= cur_RT_window[2])
            peak_ion_data_list[[as.character(num_peaks_store+1)]] <- list(ion_data=ion_data[selection,],
                                                                          Peak_info=base::data.frame(RT=RT_expected,
                                                                                               RT_window_lower=cur_RT_window[1],
                                                                                               RT_window_upper=cur_RT_window[2],
                                                                                               mz=mz_expected,
                                                                                               mz_window_lower=mz_window[1],
                                                                                               mz_window_upper=mz_window[2],
                                                                                               density=0,
                                                                                               known_peak=0,
                                                                                               Peak=4,
                                                                                               ion_count=length(selection)))
          }
        }else
        {
          ###not enough ions even in expanded window
          ###in this case simply extract ions which are within the expected window
          RT_window_width <- selected_features$RT_length
          cur_RT_window <- c(RT_expected-(RT_window_width/2),RT_expected+(RT_window_width/2))

          selection <- which(ion_data$m.z >= mz_window[1] &
                               ion_data$m.z <= mz_window[2] &
                               ion_data$RT >= cur_RT_window[1] &
                               ion_data$RT <= cur_RT_window[2])
          peak_ion_data_list[[as.character(num_peaks_store+1)]] <- list(ion_data=ion_data[selection,],
                                                                        Peak_info=base::data.frame(RT=RT_expected,
                                                                                             RT_window_lower=cur_RT_window[1],
                                                                                             RT_window_upper=cur_RT_window[2],
                                                                                             mz=mz_expected,
                                                                                             mz_window_lower=mz_window[1],
                                                                                             mz_window_upper=mz_window[2],
                                                                                             density=0,
                                                                                             known_peak=0,
                                                                                             Peak=4,
                                                                                             ion_count=length(selection)))
        }

        names(peak_ion_data_list)[which(names(peak_ion_data_list) != as.character(num_peaks_store+1))] <- base::paste("Peak_",names(peak_ion_data_list)[which(names(peak_ion_data_list) != as.character(num_peaks_store+1))],sep="")
        names(peak_ion_data_list)[which(names(peak_ion_data_list) == as.character(num_peaks_store+1))] <- "Standard"
        peak_ion_data_list$graph <- graph
        return(peak_ion_data_list)
      }

      `%not in%` <- function (x, table) is.na(match(x, table, nomatch=NA_integer_))

      t.test2 <- function(...) ###T-test
      {
        obj<-try(stats::t.test(...), silent=TRUE)
        if (methods::is(obj, "try-error")) return(NA) else return(obj$p.value)
      }

      setwd(path)


      max <- 1
      print("Load spectra data")
      pb <- tcltk::tkProgressBar(title = base::paste("Load spectra data:",Sample_ID),label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
      table_store <- NULL
      if(grepl("_Channel_light|_Channel_medium|_Channel_heavy",Sample_ID))#SILAC mode --- all channels in one raw file
      {
        load(base::paste(base::gsub("_Channel_light|_Channel_medium|_Channel_heavy","",Sample_ID),"_all_ions.RData",sep="")) ###ions with RT and intensity in variable dat
      }else
      {
        load(base::paste(Sample_ID,"_all_ions.RData",sep="")) ###ions with RT and intensity in variable dat
      }

      tcltk::setTkProgressBar(pb, 1, label=base::paste( round(1/max*100, 0)," % done (",1,"/",max,")",sep = ""))
      close(pb)

      if(MassSpec_mode == "TIMSToF")
      {
        dat <- table_store
        rm(table_store)
        gc()

        dat$RT <- dat$RT/60
        dat <- dat[,c(3,1,4,2)]
        colnames(dat) <- c("m.z","RT","Intensity","1/K0")
      }

      ###Indexing dat by RT windows to improve speed for subsetting
      num_windows <- ceiling(ceiling(max(dat$RT))*(1/indexing_RT_window))

      ###if in TIMSToF mode, further index by IM to further improve speed
      if(MassSpec_mode == "TIMSToF")
      {
        indexing_IM_window <- 0.025
        num_IM_windows <- ceiling(ceiling(max(dat$`1/K0`)-min(dat$`1/K0`))*(1/indexing_IM_window))
        num_windows_RT_only <- num_windows
        min_im <- min(dat$`1/K0`)
        num_windows <- num_windows * num_IM_windows
      }

      if(MassSpec_mode == "Orbitrap")
      {
        Indices <- base::as.data.frame(matrix(ncol=4,nrow=num_windows))
        colnames(Indices) <- c("RT_start","RT_end","Row_start","Row_end")
        Indices$RT_start <- as.numeric(Indices$RT_start)
        Indices$RT_end <- as.numeric(Indices$RT_end)
        Indices$Row_start <- as.numeric(Indices$Row_start)
        Indices$Row_end <- as.numeric(Indices$Row_end)

        print("Indexing intensities")
        max <- nrow(Indices)
        pb <- tcltk::tkProgressBar(title = base::paste("Indexing intensities:",Sample_ID),label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
        start_time <- Sys.time()
        updatecounter <- 0
        time_require <- 0
        start <- 1
        end <- max

        cur_index <- 1

        ###increment per indexing
        incr <- 10000

        for(i in 1:nrow(Indices))#
        {
          start <- (i-1)*indexing_RT_window
          end <- i*indexing_RT_window

          indx <- cur_index
          indx_prev <- cur_index
          while(T)
          {
            indx <- indx + incr

            if(dat$RT[indx] >= end | indx > nrow(dat))
            {
              break
            }else
            {
              indx_prev <- indx
            }
          }
          if(indx > nrow(dat))indx <- nrow(dat)

          #inds <- which(dat$RT > start & dat$RT < end)
          if(indx > indx_prev)
          {
            temp <- dat[indx_prev:indx,]

            if(length(which(temp$RT < end)) > 0)
            {
              inds <- cur_index:(indx_prev-1+max(which(temp$RT < end)))

              incr <- max(inds) + 1 - cur_index

              cur_index <- max(inds) + 1
              if(length(inds)>0)
              {
                data.table::set(Indices,i = as.integer(i),j=as.integer(1:4),value=as.list(c(start,end,min(inds),max(inds))))
              }
            }
          }


          updatecounter <- updatecounter + 1
          if(updatecounter >= 1)
          {
            time_elapsed <- difftime(Sys.time(),start_time,units="secs")
            time_require <- (time_elapsed/(i/max))*(1-(i/max))
            td <- lubridate::seconds_to_period(time_require)
            time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

            updatecounter <- 0
            tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
          }

        }
        close(pb)

        if(any(rowSums(is.na(Indices)) > 0))
        {
          Indices <- Indices[-which(rowSums(is.na(Indices)) > 0),]
        }

        if(min(features_select$RT_range_min) < Indices$RT_start[1])Indices$RT_start[1] <- min(features_select$RT_range_min)
        if(max(features_select$RT_range_max) > Indices$RT_end[nrow(Indices)])Indices$RT_end[nrow(Indices)] <- max(features_select$RT_range_max)

        ###Fragment data into indexed RT windows to further improve subsetting speed
        data_frags <- list()

        for(i in 1:nrow(Indices))
        {
          data_frags[[i]] <- dat[Indices$Row_start[i]:Indices$Row_end[i],]
        }

        rm(dat)
        crap <- gc(F)

      }else
      {
        Indices <- base::as.data.frame(matrix(ncol=4,nrow=num_windows))
        colnames(Indices) <- c("RT_start","RT_end","IM_start","IM_end")
        Indices$RT_start <- sort(rep(((0:(num_windows_RT_only-1))*indexing_RT_window),num_IM_windows))
        Indices$RT_end <- sort(rep(((1:num_windows_RT_only)*indexing_RT_window),num_IM_windows))
        Indices$IM_start <- min_im + ((0:(num_IM_windows-1))*indexing_IM_window)
        Indices$IM_end <- min_im + ((1:num_IM_windows)*indexing_IM_window)

        Indices_list <- list()
        Indices_list_count <- 0

        print("Indexing intensities")
        max <- num_windows_RT_only
        pb <- tcltk::tkProgressBar(title = base::paste("Indexing intensities:",Sample_ID),label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
        start_time <- Sys.time()
        updatecounter <- 0
        time_require <- 0
        start <- 1
        end <- max

        cur_index <- 1

        ###increment per indexing
        incr <- 10000

        for(i in 1:num_windows_RT_only)
        {
          start <- (i-1)*indexing_RT_window
          end <- i*indexing_RT_window

          indx <- cur_index
          indx_prev <- cur_index
          while(T)
          {
            indx <- indx + incr

            if(dat$RT[indx] >= end | indx > nrow(dat))
            {
              break
            }else
            {
              indx_prev <- indx
            }
          }
          if(indx > nrow(dat))indx <- nrow(dat)

          if(indx > indx_prev)
          {
            temp <- dat[indx_prev:indx,]

            if(length(which(temp$RT < end)) > 0)
            {
              inds <- cur_index:(indx_prev-1+max(which(temp$RT < end)))
              incr <- max(inds) + 1 - cur_index
              temp <- dat[inds,]

              ##now further subset based on IM
              for(j in 1:num_IM_windows)
              {
                start_im <- min_im + ((j-1)*indexing_IM_window)
                end_im <- min_im + (j*indexing_IM_window)

                indx_temp <- inds[which(temp$`1/K0` >= start_im & temp$`1/K0` <= end_im)]
                Indices_list_count <- Indices_list_count + 1
                if(length(indx_temp)>0)
                {
                  Indices_list[[Indices_list_count]] <- indx_temp
                }else
                {
                  Indices_list[[Indices_list_count]] <- NA
                }
              }

              cur_index <- max(inds) + 1
            }
          }

          if(Indices_list_count != (i*num_IM_windows))
          {
            missing <- (i*num_IM_windows) - Indices_list_count

            for(j in 1:missing)
            {
              Indices_list_count <- Indices_list_count + 1
              Indices_list[[Indices_list_count]] <- NA
            }
          }

          updatecounter <- updatecounter + 1
          if(updatecounter >= 1)
          {
            time_elapsed <- difftime(Sys.time(),start_time,units="secs")
            time_require <- (time_elapsed/(i/max))*(1-(i/max))
            td <- lubridate::seconds_to_period(time_require)
            time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

            updatecounter <- 0
            tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
          }

        }
        close(pb)

        if(any(is.na(Indices_list)))
        {
          remove <- which(is.na(Indices_list))
          Indices <- Indices[-remove,]
          na.omit.list <- function(y) { return(y[!sapply(y, function(x) all(is.na(x)))]) }
          Indices_list <- na.omit.list(Indices_list)
        }

        ###Fragment data into indexed RT windows to further improve subsetting speed
        data_frags <- list()

        print("Fragment data")
        max <- nrow(Indices)
        pb <- tcltk::tkProgressBar(title = base::paste("Fragment data:",Sample_ID),label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
        start_time <- Sys.time()
        updatecounter <- 0
        time_require <- 0
        for(i in 1:nrow(Indices))
        {
          data_frags[[i]] <- dat[Indices_list[[i]],]
          updatecounter <- updatecounter + 1
          if(updatecounter >= 100)
          {
            time_elapsed <- difftime(Sys.time(),start_time,units="secs")
            time_require <- (time_elapsed/(i/max))*(1-(i/max))
            td <- lubridate::seconds_to_period(time_require)
            time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

            updatecounter <- 0
            tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
          }

        }
        close(pb)
      }

      rm(dat)
      crap <- gc(F)


      ###Now extract intensities,
      ###row1 = Intensity summed,
      ###row2 = num ions,
      ###row3 = mean intensity,
      ###row4 = sd intensity,
      ###row5 = detected optimum mz window minimum
      ###row6 = detected optimum mz window maximum
      ###row7 = detected optimum RT window minimum
      ###row8 = detected optimum RT window maximum
      ###row9 = number of detected peaks

      if(peak_detection == F)
      {
        graph_peaks <- list()
        peaks_quant <- list()

        #if(MassSpec_mode == "Orbitrap")
        {
          Intensities <- base::as.data.frame(matrix(nrow=10,ncol=nrow(features_select),0))
          colnames(Intensities) <- features_select$Feature_name

          ###if cut off pvalue is defined then store background quantifications separately
          if(ion_intensity_cutoff == T)
          {
            Intensities_signal_background <- base::as.data.frame(matrix(nrow=10,ncol=nrow(features_select),0))
            colnames(Intensities_signal_background ) <- features_select$Feature_name
          }
        }
        # else
        # {
        #   Intensities <- base::as.data.frame(matrix(nrow=12,ncol=nrow(features_select),0))
        #   colnames(Intensities) <- features_select$Feature_name
        #
        #   ###if cut off pvalue is defined then store background quantifications separately
        #   if(ion_intensity_cutoff == T)
        #   {
        #     Intensities_signal_background <- base::as.data.frame(matrix(nrow=12,ncol=nrow(features_select),0))
        #     colnames(Intensities_signal_background ) <- features_select$Feature_name
        #   }
        # }

        ###store data for calculating scoring per sample and feature
        delta_T1 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
        colnames(delta_T1) <- features_select$Feature_name
        delta_T2 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
        colnames(delta_T2) <- features_select$Feature_name
        delta_M1 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
        colnames(delta_M1) <- features_select$Feature_name
        delta_M2 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
        colnames(delta_M2) <- features_select$Feature_name
      }else
      {
        graph_peaks <- list()
        peaks_quant <- list()
        #if(MassSpec_mode == "Orbitrap")
        {
          for(p in 1:(num_peaks_store+1)) ###1-num_peaks_store stores the quantification for top closest peaks while last stores total quantification of the window
          {
            Intensities <- base::as.data.frame(matrix(nrow=10,ncol=nrow(features_select),0))
            colnames(Intensities) <- features_select$Feature_name

            ###if cut off pvalue is defined then store background quantifications separately
            if(ion_intensity_cutoff == T)
            {
              Intensities_signal_background <- base::as.data.frame(matrix(nrow=10,ncol=nrow(features_select),0))
              colnames(Intensities_signal_background ) <- features_select$Feature_name
            }
            ###store data for calculating scoring per sample and feature
            delta_T1 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
            colnames(delta_T1) <- features_select$Feature_name
            delta_T2 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
            colnames(delta_T2) <- features_select$Feature_name
            delta_M1 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
            colnames(delta_M1) <- features_select$Feature_name
            delta_M2 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
            colnames(delta_M2) <- features_select$Feature_name

            peaks_quant[[p]] <- list(Intensities=Intensities,
                                     Intensities_signal_background=Intensities_signal_background,
                                     delta_T1=delta_T1,
                                     delta_T2=delta_T2,
                                     delta_M1=delta_M1,
                                     delta_M2=delta_M2)
          }
          names(peaks_quant)[1:num_peaks_store] <- base::paste("Peak_",1:num_peaks_store,sep="")
          names(peaks_quant)[num_peaks_store+1] <- "Standard"
        }
        # if(MassSpec_mode == "TIMSToF")
        # {
        #
        #   for(p in 1:(num_peaks_store+1)) ###1-num_peaks_store stores the quantification for top closest peaks while last stores total quantification of the window
        #   {
        #     Intensities <- base::as.data.frame(matrix(nrow=12,ncol=nrow(features_select),0))
        #     colnames(Intensities) <- features_select$Feature_name
        #
        #     ###if cut off pvalue is defined then store background quantifications separately
        #     if(ion_intensity_cutoff == T)
        #     {
        #       Intensities_signal_background <- base::as.data.frame(matrix(nrow=12,ncol=nrow(features_select),0))
        #       colnames(Intensities_signal_background ) <- features_select$Feature_name
        #     }
        #     ###store data for calculating scoring per sample and feature
        #     delta_T1 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
        #     colnames(delta_T1) <- features_select$Feature_name
        #     delta_T2 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
        #     colnames(delta_T2) <- features_select$Feature_name
        #     delta_M1 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
        #     colnames(delta_M1) <- features_select$Feature_name
        #     delta_M2 <- base::as.data.frame(matrix(nrow=1,ncol=nrow(features_select),0))
        #     colnames(delta_M2) <- features_select$Feature_name
        #
        #     peaks_quant[[p]] <- list(Intensities=Intensities,
        #                              Intensities_signal_background=Intensities_signal_background,
        #                              delta_T1=delta_T1,
        #                              delta_T2=delta_T2,
        #                              delta_M1=delta_M1,
        #                              delta_M2=delta_M2)
        #   }
        #   names(peaks_quant)[1:num_peaks_store] <- base::paste("Peak_",1:num_peaks_store,sep="")
        #   names(peaks_quant)[num_peaks_store+1] <- "Standard"
        # }
      }

      print("Extracting intensities")
      max <- nrow(features_select)
      pb <- tcltk::tkProgressBar(title = base::paste("Extracting intensities:",Sample_ID),label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
      start_time <- Sys.time()
      updatecounter <- 0
      time_require <- 0
      end <- max

      Sample_ID_save <- Sample_ID
      Sample_ID <- base::gsub("-",".",Sample_ID)

      iso_mult <- (1:3)*1.002054

      peak_width_stats <- grDevices::boxplot.stats(features_select$RT_length)$stats
      delta_mz <- stats::median(features_select$m.z - features_select$m.z_range_min,na.rm=T)
      delta_rt <- stats::median(features_select$RT - features_select$RT_range_min,na.rm=T)/2

      known_RTs <- base::as.data.frame(stringr::str_split(features_select$Observed_RT,";",simplify = T))
      colnames(known_RTs) <- base::substr(colnames(features_select)[which(grepl("RT_calibration",colnames(features_select)))],16,1000)
      known_RTs[] <- lapply(known_RTs, function(x) as.numeric(as.character(x)))
      known_RTs_all <- known_RTs
      known_RTs <- known_RTs[,Sample_ID]

      RT_window_expand_factor <- 5 ###expand RT window around expected window for 2Dpeak detection

      if(MassSpec_mode == "TIMSToF")
      {
        IM_width_stats <- grDevices::boxplot.stats(features_select$Inv_K0_length)$stats
        delta_im <- stats::median(features_select$Inv_K0 - features_select$Inv_K0_range_min,na.rm=T)/2

        known_IMs <- base::as.data.frame(stringr::str_split(features_select$Observed_IM,";",simplify = T))
        colnames(known_IMs) <- base::substr(colnames(features_select)[which(grepl("IM_calibration",colnames(features_select)))],16,1000)
        known_IMs[] <- lapply(known_IMs, function(x) as.numeric(as.character(x)))
        known_IMs_all <- known_IMs
        known_IMs <- known_IMs[,Sample_ID]

        IM_window_expand_factor <- 5
        delta_im <- stats::median(features_select$Inv_K0 - features_select$Inv_K0_range_min,na.rm=T)/2

      }

      if(plots == T)
      {
        dir.create(base::paste(path,"/2Dpeakselection",sep=""))
        dir.create(base::paste(path,"/2Dpeakselection/",Sample_ID,sep=""))
      }

      for(i in 1:nrow(features_select))
      {
        if(MassSpec_mode == "Orbitrap")
        {
          ###extract all relevant ions in RT window
          RT_correction <- ifelse(RT_calibration == T,features_select[i,base::paste("RT_calibration.",Sample_ID,sep="")],0)

          if(peak_detection == T)# & !grepl("_d",features_select$Feature_name[i]))
          {
            RT_window <- c(features_select$RT_range_min[i] + RT_correction,
                           features_select$RT_range_max[i] + RT_correction)
            RT_window <- c(RT_window[1]-(delta_rt*RT_window_expand_factor),
                           RT_window[2]+(delta_rt*RT_window_expand_factor))
          }else
          {
            RT_window <- c(features_select$RT[i]+RT_correction-(features_select$RT_length[i]),features_select$RT[i]+RT_correction+(features_select$RT_length[i]))
          }

          ###get search range in spectra
          if(RT_window[1] <= max(Indices$RT_start) & RT_window[2] <= max(Indices$RT_end))
          {
            search_range <- (which(Indices$RT_start>=RT_window[1])[1]-1):(which(Indices$RT_end>=RT_window[2])[1])
          }else
          {
            search_range <- nrow(Indices)
          }
          if(any(search_range < 1))
          {
            search_range <- search_range[-which(search_range < 1)]
          }
        }else
        {
          ###extract all relevant ions in RT window
          RT_correction <- ifelse(RT_calibration == T,features_select[i,base::paste("RT_calibration.",Sample_ID,sep="")],0)
          IM_correction <- features_select[i,base::paste("IM_calibration.",Sample_ID,sep="")]

          if(peak_detection == T)# & !grepl("_d",features_select$Feature_name[i]))
          {
            RT_window <- c(features_select$RT_range_min[i] + RT_correction,
                           features_select$RT_range_max[i] + RT_correction)
            RT_window <- c(RT_window[1]-(delta_rt*RT_window_expand_factor),
                           RT_window[2]+(delta_rt*RT_window_expand_factor))

            IM_window <- c(features_select$Inv_K0_range_min[i] + IM_correction,
                           features_select$Inv_K0_range_max[i] + IM_correction)
            IM_window <- c(IM_window[1]-(delta_im*IM_window_expand_factor),
                           IM_window[2]+(delta_im*IM_window_expand_factor))
          }else
          {
            RT_window <- c(features_select$RT[i]+RT_correction-(features_select$RT_length[i]),features_select$RT[i]+RT_correction+(features_select$RT_length[i]))
            IM_window <- c(features_select$Inv_K0[i]+IM_correction-(features_select$Inv_K0_length[i]),features_select$Inv_K0[i]+IM_correction+(features_select$Inv_K0_length[i]))
          }

          ###get search range in spectra
          if(RT_window[1] <= max(Indices$RT_start) & RT_window[2] <= max(Indices$RT_end))
          {
            search_range <- which(Indices$RT_start <= RT_window[1] & Indices$RT_end >= RT_window[1] | Indices$RT_start >= RT_window[1] & Indices$RT_end <= RT_window[2] | Indices$RT_start >= RT_window[1] & Indices$RT_start <= RT_window[2])
            search_range_IM <- which(Indices$IM_start <= IM_window[1] & Indices$IM_end >= IM_window[1] | Indices$IM_start >= IM_window[1] & Indices$IM_end <= IM_window[2] | Indices$IM_start >= IM_window[1] & Indices$IM_start <= IM_window[2])
            search_range <- search_range[which(search_range %in% search_range_IM)]

          }else
          {
            search_range <- nrow(Indices)
          }
          if(any(search_range < 1))
          {
            search_range <- search_range[-which(search_range < 1)]
          }
        }

        if(length(search_range)>0)
        {
          sub <- data.table::rbindlist(data_frags[search_range])

          res <- NULL
          if(peak_detection == T)#& !grepl("_d",features_select$Feature_name[i])) ###use 2D peak detection
          {
            if(plots == T)
            {
              grDevices::pdf(base::paste(path,"/2Dpeakselection/",Sample_ID,"/",features_select$Feature_name[i],".pdf",sep=""))
            }

            if(MassSpec_mode == "Orbitrap")
            {
              res <- feature_2D_peak_selection(mz_bw=0.002,all_ion_data = sub,selected_features = features_select[i,],cur_sample = Sample_ID,known_RT = known_RTs[i],delta_mz = delta_mz,delta_rt=delta_rt,max_delta_RT = 2*delta_rt,max_delta_mz = 3*delta_mz,peak_min_ion_count = peak_min_ion_count,n_raster_dens_matrix = kde_resolution,num_peaks_store=num_peaks_store,plot = plots,MassSpec_mode = MassSpec_mode)
            }else
            {
              res <- feature_2D_peak_selection(mz_bw=0.01,all_ion_data = sub,selected_features = features_select[i,],cur_sample = Sample_ID,known_RT = known_RTs[i],delta_mz = delta_mz,delta_rt=delta_rt,max_delta_RT = 2*delta_rt,max_delta_mz = 3*delta_mz,peak_min_ion_count = peak_min_ion_count,n_raster_dens_matrix = kde_resolution,num_peaks_store=num_peaks_store,plot = plots,MassSpec_mode = MassSpec_mode,delta_im = delta_im,IM_window_expand_factor=IM_window_expand_factor,use_IM_data=use_IM_data)
            }

            if(plots == T)
            {
              grDevices::dev.off()
            }
            if(plots==F) ###store graphs in list
            {
              #graph_peaks[[as.character(features_select$Feature_name[i])]] <- res$graph
            }
            res$graph <- NULL
          }else ###no peak detection
          {
            res <- feature_no_peak_selection(all_ion_data = sub,selected_features = features_select[i,],cur_sample = Sample_ID,num_peaks_store=num_peaks_store,MassSpec_mode = MassSpec_mode,use_IM_data=use_IM_data)
          }

          ###check if any ions are available in any window
          temp <- unlist(res)

          if(any(temp[grepl("ion_count",names(temp))]>0))
          {
            if(peak_detection == F) ##no peak detection - simply sum all ion intensities within expected window
            {
              ####all ions
              m.z_window_final <- c(res$Standard$Peak_info$mz_window_lower,res$Standard$Peak_info$mz_window_upper)
              RT_window_final <- c(res$Standard$Peak_info$RT_window_lower,res$Standard$Peak_info$RT_window_upper)
              res_int_total <- res$Standard$ion_data$Intensity
              res_mz_total <- res$Standard$ion_data$m.z
              res_rt_total <- res$Standard$ion_data$RT
              res_isotope_total <- res$Standard$ion_data$isotope
              length_data_total <- length(res_int_total)
              if(MassSpec_mode == "TIMSToF")
              {
                IM_window_final <- c(res$Standard$Peak_info$IM_window_lower,res$Standard$Peak_info$IM_window_upper)
                res_im_total <- res$Standard$ion_data$`1/K0`
              }

              ###determine which ions are showing an intensity above the background signal intensity
              if(ion_intensity_cutoff == T)
              {
                res$Standard$ion_data$signal_background <- ifelse(base::log2(res$Standard$ion_data$Intensity) > mean_background_ion_intensity_model[i,Sample_ID]+2*sd_background_ion_intensity[1,Sample_ID],1,0)

                res_int_signal <- res_int_total[which(res$Standard$ion_data$signal_background == 1)]
                res_mz_signal <- res_mz_total[which(res$Standard$ion_data$signal_background == 1)]
                res_rt_signal <- res_rt_total[which(res$Standard$ion_data$signal_background == 1)]
                res_isotope_signal <- res_isotope_total[which(res$Standard$ion_data$signal_background == 1)]
                length_data_signal <- length(res_int_signal)

                ###save signal Intensities
                if(length(res_int_signal)>0)
                {
                  #if(MassSpec_mode == "Orbitrap")
                  {
                    data.table::set(Intensities_signal_background,i=as.integer(1:8),j=as.integer(i),value=list(c(log10(sum(res_int_signal)),
                                                                                                     length(res_int_signal),
                                                                                                     mean(log10(res_int_signal),na.rm=T),
                                                                                                     stats::sd(log10(res_int_signal),na.rm=T),
                                                                                                     m.z_window_final[1],
                                                                                                     m.z_window_final[2],
                                                                                                     RT_window_final[1],
                                                                                                     RT_window_final[2])))
                  }
                  # else
                  # {
                  #   data.table::set(Intensities_signal_background,i=as.integer(c(1:8,11:12)),j=as.integer(i),value=list(c(log10(sum(res_int_signal)),
                  #                                                                                             length(res_int_signal),
                  #                                                                                             mean(log10(res_int_signal),na.rm=T),
                  #                                                                                             stats::sd(log10(res_int_signal),na.rm=T),
                  #                                                                                             m.z_window_final[1],
                  #                                                                                             m.z_window_final[2],
                  #                                                                                             RT_window_final[1],
                  #                                                                                             RT_window_final[2],
                  #                                                                                             IM_window_final[1],
                  #                                                                                             IM_window_final[2])))
                  # }
                }

                ###save signal+background Intensities
                if(length(res_int_total)>0)
                {
                  #if(MassSpec_mode == "Orbitrap")
                  {
                    data.table::set(Intensities_signal_background,i=as.integer(1:8),j=as.integer(i),value=list(c(log10(sum(res_int_total)),
                                                                                                     length(res_int_total),
                                                                                                     mean(log10(res_int_total),na.rm=T),
                                                                                                     stats::sd(log10(res_int_total),na.rm=T),
                                                                                                     m.z_window_final[1],
                                                                                                     m.z_window_final[2],
                                                                                                     RT_window_final[1],
                                                                                                     RT_window_final[2])))
                  }
                  # else
                  # {
                  #   data.table::set(Intensities_signal_background,i=as.integer(c(1:8,11:12)),j=as.integer(i),value=list(c(log10(sum(res_int_total)),
                  #                                                                                    length(res_int_total),
                  #                                                                                    mean(log10(res_int_total),na.rm=T),
                  #                                                                                    stats::sd(log10(res_int_total),na.rm=T),
                  #                                                                                    m.z_window_final[1],
                  #                                                                                    m.z_window_final[2],
                  #                                                                                    RT_window_final[1],
                  #                                                                                    RT_window_final[2],
                  #                                                                                    IM_window_final[1],
                  #                                                                                    IM_window_final[2])))
                  # }
                }
              }else ###no discrimination between signal and background ions ## save all ions
              {
                ###save signal+background Intensities
                if(length(res_int_total)>0)
                {
                  #if(MassSpec_mode == "Orbitrap")
                  {
                    data.table::set(Intensities,i=as.integer(1:8),j=as.integer(i),value=list(c(log10(sum(res_int_total)),
                                                                                   length(res_int_total),
                                                                                   mean(log10(res_int_total),na.rm=T),
                                                                                   stats::sd(log10(res_int_total),na.rm=T),
                                                                                   m.z_window_final[1],
                                                                                   m.z_window_final[2],
                                                                                   RT_window_final[1],
                                                                                   RT_window_final[2])))
                  }
                  # else
                  # {
                  #   data.table::set(Intensities,i=as.integer(c(1:8,11:12)),j=as.integer(i),value=list(c(log10(sum(res_int_total)),
                  #                                                                  length(res_int_total),
                  #                                                                  mean(log10(res_int_total),na.rm=T),
                  #                                                                  stats::sd(log10(res_int_total),na.rm=T),
                  #                                                                  m.z_window_final[1],
                  #                                                                  m.z_window_final[2],
                  #                                                                  RT_window_final[1],
                  #                                                                  RT_window_final[2],
                  #                                                                  IM_window_final[1],
                  #                                                                  IM_window_final[2])))
                  # }
                }
              }

              ###feature alignment Scoreing based on algorithm from DeMix-Q (Zhang, 2016) - use all ions
              if(length_data_total > 0)
              {
                df <- base::data.frame(mz=res_mz_total,rt=res_rt_total,iso=res_isotope_total,int=res_int_total)
                df_mono_isotope <- df[which(df$iso==0),]
                df_isotope_1 <- df[which(df$iso==1),]
                if(nrow(df_mono_isotope)>0)
                {
                  ###deviation from consensus feature
                  max_mono <- which(df_mono_isotope$int == max(df_mono_isotope$int,na.rm=T))
                  data.table::set(delta_T1,i=1L,j=as.integer(i),mean(df_mono_isotope$rt[max_mono],na.rm=T) - (features_select$RT[i]))
                  data.table::set(delta_M1,i=1L,j=as.integer(i),mean(df_mono_isotope$mz[max_mono],na.rm=T) - (features_select$m.z[i]))
                  if(nrow(df_isotope_1)>0)
                  {
                    ###deviation from monoisotopic ion to M+1 isotope ion
                    max_iso_1 <- which(df_isotope_1$int == max(df_isotope_1$int,na.rm=T))
                    data.table::set(delta_T2,i=1L,j=as.integer(i),mean(df_mono_isotope$rt[max_mono],na.rm=T) - mean(df_isotope_1$rt[max_iso_1],na.rm=T))
                    data.table::set(delta_M2,i=1L,j=as.integer(i),(mean(df_mono_isotope$mz[max_mono],na.rm=T)+(iso_mult[1]/features_select$Charge[i])) - mean(df_isotope_1$mz[max_iso_1],na.rm=T))
                  }
                }
              }
            }else ###with peak detection
            {
              ###store up to num_peaks_store potential peak windows + the original expected window
              num_peaks <- length(which(names(res) != "Standard" & names(res) != "graph"))
              for(p in names(res)[which(names(res) != "graph")])
              {
                m.z_window_final <- c(res[[p]]$Peak_info$mz_window_lower,res[[p]]$Peak_info$mz_window_upper)
                RT_window_final <- c(res[[p]]$Peak_info$RT_window_lower,res[[p]]$Peak_info$RT_window_upper)

                res_int_total <- res[[p]]$ion_data$Intensity
                res_mz_total <- res[[p]]$ion_data$m.z
                res_rt_total <- res[[p]]$ion_data$RT
                res_isotope_total <- res[[p]]$ion_data$isotope
                length_data_total <- length(res_int_total)
                if(MassSpec_mode == "TIMSToF")
                {
                  IM_window_final <- c(res$Standard$Peak_info$IM_window_lower,res$Standard$Peak_info$IM_window_upper)
                  res_im_total <- res$Standard$ion_data$`1/K0`
                }

                ###determine which ions are showing an intensity above the background signal intensity
                if(ion_intensity_cutoff == T)
                {
                  res[[p]]$ion_data$signal_background <- ifelse(base::log2(res[[p]]$ion_data$Intensity) > mean_background_ion_intensity_model[i,Sample_ID]+2*sd_background_ion_intensity[1,Sample_ID],1,0)

                  res_int_signal <- res_int_total[which(res[[p]]$ion_data$signal_background == 1)]
                  res_mz_signal <- res_mz_total[which(res[[p]]$ion_data$signal_background == 1)]
                  res_rt_signal <- res_rt_total[which(res[[p]]$ion_data$signal_background == 1)]
                  res_isotope_signal <- res_isotope_total[which(res[[p]]$ion_data$signal_background == 1)]
                  length_data_signal <- length(res_int_signal)

                  ###save signal Intensities
                  if(length(res_int_signal)>0)
                  {
                    data.table::set(peaks_quant[[p]]$Intensities,i=as.integer(1:10),j=as.integer(i),value=list(c(log10(sum(res_int_signal)),
                                                                                                     length(res_int_signal),
                                                                                                     mean(log10(res_int_signal),na.rm=T),
                                                                                                     stats::sd(log10(res_int_signal),na.rm=T),
                                                                                                     m.z_window_final[1],
                                                                                                     m.z_window_final[2],
                                                                                                     RT_window_final[1],
                                                                                                     RT_window_final[2],
                                                                                                     num_peaks,
                                                                                                     res[[p]]$Peak_info$known_peak)))
                  }

                  ###save signal+background Intensities
                  if(length(res_int_total)>0)
                  {
                    data.table::set(peaks_quant[[p]]$Intensities_signal_background,i=as.integer(1:10),j=as.integer(i),value=list(c(log10(sum(res_int_total)),
                                                                                                                       length(res_int_total),
                                                                                                                       mean(log10(res_int_total),na.rm=T),
                                                                                                                       stats::sd(log10(res_int_total),na.rm=T),
                                                                                                                       m.z_window_final[1],
                                                                                                                       m.z_window_final[2],
                                                                                                                       RT_window_final[1],
                                                                                                                       RT_window_final[2],
                                                                                                                       num_peaks,
                                                                                                                       res[[p]]$Peak_info$known_peak)))
                  }else ###no intensity at all then at least save standard information
                  {
                    data.table::set(peaks_quant[[p]]$Intensities_signal_background,i=as.integer(1:10),j=as.integer(i),value=list(c(NA,
                                                                                                                       length(res_int_total),
                                                                                                                       NA,
                                                                                                                       NA,
                                                                                                                       m.z_window_final[1],
                                                                                                                       m.z_window_final[2],
                                                                                                                       RT_window_final[1],
                                                                                                                       RT_window_final[2],
                                                                                                                       num_peaks,
                                                                                                                       res[[p]]$Peak_info$known_peak)))
                  }
                }else ###no discrimination between signal and background ions ## save all ions
                {
                  ###save signal+background Intensities
                  if(length(res_int_total)>0)
                  {
                    data.table::set(Intensities,i=as.integer(1:10),j=as.integer(i),value=list(c(log10(sum(res_int_total)),
                                                                                    length(res_int_total),
                                                                                    mean(log10(res_int_total),na.rm=T),
                                                                                    stats::sd(log10(res_int_total),na.rm=T),
                                                                                    m.z_window_final[1],
                                                                                    m.z_window_final[2],
                                                                                    RT_window_final[1],
                                                                                    RT_window_final[2],
                                                                                    num_peaks,
                                                                                    res[[p]]$Peak_info$known_peak)))
                  }else ###no intensity at all then at least save standard information
                  {
                    data.table::set(Intensities,i=as.integer(1:10),j=as.integer(i),value=list(c(NA,
                                                                                    length(res_int_total),
                                                                                    NA,
                                                                                    NA,
                                                                                    m.z_window_final[1],
                                                                                    m.z_window_final[2],
                                                                                    RT_window_final[1],
                                                                                    RT_window_final[2],
                                                                                    num_peaks,
                                                                                    res[[p]]$Peak_info$known_peak)))
                  }
                }

                ###feature alignment Scoreing based on algorithm from DeMix-Q (Zhang, 2016) - use all ions
                if(length_data_total > 0)
                {
                  df <- base::data.frame(mz=res_mz_total,rt=res_rt_total,iso=res_isotope_total,int=res_int_total)
                  df_mono_isotope <- df[which(df$iso==0),]
                  df_isotope_1 <- df[which(df$iso==1),]
                  if(nrow(df_mono_isotope)>0)
                  {
                    ###deviation from consensus feature
                    max_mono <- which(df_mono_isotope$int == max(df_mono_isotope$int,na.rm=T))
                    data.table::set(peaks_quant[[p]]$delta_T1,i=1L,j=as.integer(i),mean(df_mono_isotope$rt[max_mono],na.rm=T) - (features_select$RT[i]))
                    data.table::set(peaks_quant[[p]]$delta_M1,i=1L,j=as.integer(i),mean(df_mono_isotope$mz[max_mono],na.rm=T) - (features_select$m.z[i]))
                    if(nrow(df_isotope_1)>0)
                    {
                      ###deviation from monoisotopic ion to M+1 isotope ion
                      max_iso_1 <- which(df_isotope_1$int == max(df_isotope_1$int,na.rm=T))
                      data.table::set(peaks_quant[[p]]$delta_T2,i=1L,j=as.integer(i),mean(df_mono_isotope$rt[max_mono],na.rm=T) - mean(df_isotope_1$rt[max_iso_1],na.rm=T))
                      data.table::set(peaks_quant[[p]]$delta_M2,i=1L,j=as.integer(i),(mean(df_mono_isotope$mz[max_mono],na.rm=T)+(iso_mult[1]/features_select$Charge[i])) - mean(df_isotope_1$mz[max_iso_1],na.rm=T))
                    }
                  }
                }

              }
            }
          }

        }

        updatecounter <- updatecounter + 1
        if(updatecounter >= 5)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d:%02d',  td@day, td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }

      }
      close(pb)

      if(peak_detection == F)
      {
        Intensities <- t(Intensities)
        Intensities[Intensities == 0] <- NA

        if(ion_intensity_cutoff == T)
        {
          Intensities_signal_background <- t(Intensities_signal_background)
          Intensities_signal_background[Intensities_signal_background == 0] <- NA
        }

        delta_T1 <- t(delta_T1)
        delta_T1[delta_T1 == 0] <- NA
        delta_M1 <- t(delta_M1)
        delta_M1[delta_M1 == 0] <- NA
        delta_T2 <- t(delta_T2)
        delta_T2[delta_T2 == 0] <- NA
        delta_M2 <- t(delta_M2)
        delta_M2[delta_M2 == 0] <- NA

        peaks_quant <- list()
        if(ion_intensity_cutoff == T)
        {
          peaks_quant[[1]] <- list(Intensities=Intensities,
                                   Intensities_signal_background=Intensities_signal_background,
                                   delta_T1=delta_T1,
                                   delta_T2=delta_T2,
                                   delta_M1=delta_M1,
                                   delta_M2=delta_M2)
        }else
        {
          peaks_quant[[1]] <- list(Intensities=Intensities,
                                   delta_T1=delta_T1,
                                   delta_T2=delta_T2,
                                   delta_M1=delta_M1,
                                   delta_M2=delta_M2)
        }

        return(list(peaks_quant=peaks_quant,graph_peaks=graph_peaks))

      }else
      {
        for(p in 1:(num_peaks_store+1))
        {
          peaks_quant[[p]]$Intensities <- t(peaks_quant[[p]]$Intensities)
          peaks_quant[[p]]$Intensities[peaks_quant[[p]]$Intensities == 0] <- NA

          peaks_quant[[p]]$Intensities_signal_background <- t(peaks_quant[[p]]$Intensities_signal_background)
          peaks_quant[[p]]$Intensities_signal_background[peaks_quant[[p]]$Intensities_signal_background == 0] <- NA

          peaks_quant[[p]]$delta_T1 <- t(peaks_quant[[p]]$delta_T1)
          peaks_quant[[p]]$delta_T1[peaks_quant[[p]]$delta_T1 == 0] <- NA
          peaks_quant[[p]]$delta_M1 <- t(peaks_quant[[p]]$delta_M1)
          peaks_quant[[p]]$delta_M1[peaks_quant[[p]]$delta_M1 == 0] <- NA
          peaks_quant[[p]]$delta_T2 <- t(peaks_quant[[p]]$delta_T2)
          peaks_quant[[p]]$delta_T2[peaks_quant[[p]]$delta_T2 == 0] <- NA
          peaks_quant[[p]]$delta_M2 <- t(peaks_quant[[p]]$delta_M2)
          peaks_quant[[p]]$delta_M2[peaks_quant[[p]]$delta_M2 == 0] <- NA
        }

        return(list(peaks_quant=peaks_quant,graph_peaks=graph_peaks))

      }

    }

    ####Function to call get_intensities and finally save resulting table as .tab data table
    extract_intensities <- function(Sample_ID,features_select,path_to_raw,path_to_output_folder,RT_calibration,mz_calibration,peak_detection,ion_intensity_cutoff,mean_background_ion_intensity_model,sd_background_ion_intensity,peak_min_ion_count,kde_resolution,num_peaks_store,plots,MassSpec_mode,use_IM_data)
    {
      #suppressWarnings(suppressMessages(library(data.table,quietly = T)))

      res <- get_intensities(Sample_ID,path = path_to_raw,features_select=features_select,RT_calibration=RT_calibration,mz_calibration=mz_calibration,peak_detection=peak_detection,ion_intensity_cutoff = ion_intensity_cutoff,mean_background_ion_intensity_model=mean_background_ion_intensity_model,sd_background_ion_intensity=sd_background_ion_intensity,peak_min_ion_count=peak_min_ion_count,kde_resolution=kde_resolution,num_peaks_store = num_peaks_store,plots=plots,MassSpec_mode=MassSpec_mode,use_IM_data=use_IM_data)

      peaks_quant <- res$peaks_quant
      #peaks_graph <- res$graph_peaks

      save(peaks_quant,file = base::paste(path_to_output_folder,"/",Sample_ID,"_feature_quant.RData",sep=""))
      #if(length(peaks_graph)>0)save(peaks_graph,file = base::paste(path_to_output_folder,"/",Sample_ID,"_feature_graphs.RData",sep=""))

    }

    ####Prepare threads to run extraction. The task is using much memory so that more than 2 threads in parallel on a pc with 16 gb of ram
    ####results in slower performance than for just 2 threads
    cl <- parallel::makeCluster(n_cores)#as.numeric(Sys.getenv('NUMBER_OF_PROCESSORS')))
    doParallel::registerDoParallel(cl)
    res <- foreach::foreach(i=Sample_IDs) %dopar%
      {
        extract_intensities(Sample_ID = i,features_select = features_select,path_to_raw,path_to_output_folder,RT_calibration,mz_calibration,peak_detection=peak_detection,ion_intensity_cutoff = ion_intensity_cutoff,mean_background_ion_intensity_model=mean_background_ion_intensity_model,sd_background_ion_intensity=sd_background_ion_intensity,peak_min_ion_count=peak_min_ion_count,kde_resolution=kde_resolution,num_peaks_store=num_peaks_store,plots=plots,MassSpec_mode=MassSpec_mode,use_IM_data=use_IM_data)
      }
    parallel::stopCluster(cl)

    # for(i in Sample_IDs)
    # {
    #   extract_intensities(Sample_ID = i,features_select = features_select,path_to_raw,path_to_output_folder,RT_calibration,mz_calibration,peak_detection=peak_detection,ion_intensity_cutoff = ion_intensity_cutoff,mean_background_ion_intensity_model=mean_background_ion_intensity_model,sd_background_ion_intensity=sd_background_ion_intensity,peak_min_ion_count=peak_min_ion_count,kde_resolution=kde_resolution,num_peaks_store=num_peaks_store,plots=plots,MassSpec_mode=MassSpec_mode,use_IM_data=use_IM_data)
    # }

  }

  if(MassSpec_mode == "Orbitrap")
  {
    ###check for which samples mzXML files are available
    mzXMLfiles <- list.files(base::paste(path_to_mzXML,"/all_ion_lists",sep=""))
    mzXMLfiles <- mzXMLfiles[which(grepl(".RData",mzXMLfiles))]
    samples <- mzXMLfiles
    samples <- base::substr(samples,1,regexpr("_all_ions.RData",samples)-1)
  }
  if(MassSpec_mode == "TIMSToF")
  {
    ###check for which samples extracted spectra files are available
    mzXMLfiles <- list.files(path_to_extracted_spectra)
    mzXMLfiles <- mzXMLfiles[which(grepl(".RData",mzXMLfiles))]
    samples <- mzXMLfiles
    samples <- base::substr(samples,1,regexpr("_all_ions.RData",samples)-1)
    path_to_mzXML <- base::gsub("\\\\all_ion_lists|/all_ion_lists","",path_to_extracted_spectra)
  }

  ###keep samples which should be actually requantified
  Requant_samples <- base::gsub("RT_calibration\\.","",colnames(features)[which(grepl("RT_calibration\\.",colnames(features)))])
  Requant_samples <- base::gsub("\\.","-",Requant_samples)

  samples <- samples[which(samples %in% Requant_samples)]

  ###Perform quantification of decoy features

  dir.create(base::paste(path_to_mzXML,"/all_ion_lists/Extracted decoy intensities",output_file_names_add,sep=""),showWarnings = F)
  available <- list.files(base::paste(path_to_mzXML,"/all_ion_lists/Extracted decoy intensities",output_file_names_add,sep=""))
  available <- base::gsub("_feature_quant.RData","",available)

  # return a warning if for some samples decoy features were already extracted
  if(length(which(samples %in% available))>0)
  {
    indx <- which(samples %in% available)
    options(warn=1)
    warning(paste0(Sys.time()," Decoy intensities were already extracted for ",paste(samples[indx],collapse=",")," and will be used for subsequent IceR steps. If this is unintended because raw files, MaxQuant results, or IceR parameters were changed, please stop IceR now, delete the folder ",paste(path_to_mzXML,"/all_ion_lists/Extracted decoy intensities",output_file_names_add,sep="")," and restart IceR. Consider removing ",paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,sep="")," as well. If any of the previously mentioned changes were made, please consider removing the complete Temporary_files folder to enable a fresh run of IceR or at least Quantification_raw_results.RData."))
    options(warn=-1)
  }

  if(length(which(samples %not in% available))>0)
  {
    print(paste0(Sys.time()," Extract decoy intensities"))
    selected_decoys <- which(grepl("_d",features$Feature_name))

    samples <- samples[which(samples %not in% available)]
    #single core in case of TIMSToF data as it fills memory too much
    extract_intensities_worker(Sample_IDs = as.character(samples),
                               features_select = features[selected_decoys,],
                               path_to_raw = base::paste(path_to_mzXML,"/all_ion_lists",sep=""),
                               path_to_output_folder = base::paste(path_to_mzXML,"/all_ion_lists/Extracted decoy intensities",output_file_names_add,sep=""),
                               RT_calibration=RT_calibration,
                               mz_calibration=mz_calibration,
                               peak_detection=F,
                               n_cores=ifelse(MassSpec_mode == "Orbitrap",n_cores,ifelse(n_cores >= 3,3,n_cores)),
                               MassSpec_mode = MassSpec_mode,
                               use_IM_data=use_IM_data)
    print(paste0(Sys.time()," Extract decoy intensities finished"))
  }

  ###determine distribution of background ion intensities
  samples <- list.files(base::paste(path_to_mzXML,"/all_ion_lists/Extracted decoy intensities",output_file_names_add,sep=""))
  samples <- samples[which(grepl("_feature_quant.RData",samples))]
  samples <- base::substr(samples,1,regexpr("_feature_quant.RData",samples)-1)

  files <- list.files(base::paste(path_to_mzXML,"/all_ion_lists/Extracted decoy intensities",output_file_names_add,sep=""))
  if(length(which(!grepl("_feature_quant.RData",files)))>0)
  {
    files <- files[-which(!grepl("_feature_quant.RData",files))]
  }

  features_select = features[which(grepl("_d",features$Feature_name)),]

  decoy_intensities <- base::as.data.frame(matrix(ncol=length(samples),nrow=nrow(features_select)))
  colnames(decoy_intensities) <- samples
  decoy_intensities <- sapply( decoy_intensities, as.numeric )
  decoy_intensities <- data.table::as.data.table(decoy_intensities)
  rownames(decoy_intensities) <- features_select$Feature_name

  decoy_ioncount <- decoy_intensities
  decoy_mean_intensity <- decoy_intensities
  decoy_sd_intensity <- decoy_intensities

  for(c in 1:ncol(decoy_intensities))
  {
    #load stored data into variable peaks_quant
    peaks_quant <- NULL
    load(base::paste(path_to_mzXML,"/all_ion_lists/Extracted decoy intensities",output_file_names_add,"/",colnames(decoy_intensities)[c],"_feature_quant.RData",sep=""))

    signal=peaks_quant[[1]]$Intensities

    decoy_intensities[,c] <- base::log2(10^signal[,1])
    decoy_ioncount[,c] <- signal[,2]
    decoy_mean_intensity[,c] <- base::log2(10^signal[,3])
    decoy_sd_intensity[,c] <- base::log2(10^signal[,4])
  }

  ###plot general numbers of quantifications of decoy features
  print(paste0(Sys.time()," Fit decoy models"))
  setwd(path_to_features)
  grDevices::pdf("Temporary_files/Decoy feature quantification parameters.pdf")

  ###mean decoy intensity (intensity of a single decoy ion)
  RT_all <- rep(as.numeric(features_select$RT),ncol(decoy_mean_intensity))
  x_all <- as.numeric(as.matrix(decoy_mean_intensity))
  graphics::smoothScatter(RT_all,x_all,ylab="Intensity, log2",main="All samples - Decoy feature mean intensity",xlab="RT [min]")

  ##try to fit an average generalised additive model to determine a RT dependent mean intensity and sd of intensity

  if (length(which(!is.na(unique(x_all)))) > 10) {
    fit_gam_mean <- mgcv::gam(x_all ~ s(RT_all), method = "REML")
  } else {
    warning("Too few decoy ion intensities available. Subsequent statistical evaluations might be wrong.")
    x_all <- rnorm(length(RT_all),mean = 1,sd = 0.1)
    fit_gam_mean <- mgcv::gam(x_all ~ s(RT_all), method = "REML")
  }
  x_pred <- seq(min(features_select$RT,na.rm=T), max(features_select$RT,na.rm=T), length.out = nrow(features_select))
  y_pred <- stats::predict(fit_gam_mean, base::data.frame(RT_all = x_pred))
  graphics::lines(x_pred,y_pred,col="red")
  graphics::legend("topright",legend="GAM",lty=1,col="red")

  ##now fit gam models per sample
  fit_gam_per_sample <- list()
  for(c in 1:ncol(decoy_mean_intensity))
  {
    RT <- as.numeric(features_select$RT)
    x <- as.numeric(as.matrix(decoy_mean_intensity)[,c])
    graphics::smoothScatter(RT,x,ylab="Mean intensity, log2",main=base::paste(colnames(decoy_mean_intensity)[c],"Decoy feature intensity"),xlab="RT [min]")

    ##try to fit an average generalised additive model to determine a RT dependent mean intensity and sd of intensity
    ##if not enough data points available, use the average gam
    gam <- tryCatch({mgcv::gam(x ~ s(RT), method = "REML")}, error = function(error_condition) {
      RT <- rep(as.numeric(features_select$RT),ncol(decoy_mean_intensity))
      x <- as.numeric(as.matrix(decoy_mean_intensity))
      mgcv::gam(x ~ s(RT), method = "REML")
    })
    x_pred <- seq(min(features_select$RT,na.rm=T), max(features_select$RT,na.rm=T), length.out = nrow(features_select))
    y_pred <- stats::predict(gam, base::data.frame(RT = x_pred))
    graphics::lines(x_pred,y_pred,col="red")
    graphics::legend("topright",legend="GAM",lty=1,col="red")
    fit_gam_per_sample[[colnames(decoy_mean_intensity)[c]]] <- gam
  }

  graphics::par(mfrow=c(2,2))
  graphics::boxplot(as.numeric(as.matrix(decoy_intensities)),outline=F,ylab="Summed intensity, log2",main="Summed intensity of decoy ions")
  graphics::boxplot(as.numeric(as.matrix(decoy_mean_intensity)),outline=F,ylab="Mean intensity, log2",main="Mean intensity of decoy ions")
  graphics::boxplot(as.numeric(as.matrix(decoy_sd_intensity)),outline=F,ylab="SD of intensity, log2",main="SD of intensity of decoy ions")
  graphics::boxplot(as.numeric(as.matrix(decoy_ioncount)),outline=F,ylab="Count",main="Number of ions in decoys with quantification")
  graphics::par(mfrow=c(1,1))
  grDevices::dev.off()

  ###Now predict background intensities per sample and feature by using individually fitted GAMs, multiply with median decoy ion count and add some noise using observed decoy intensity sds per sample
  background_intensity_GAM_table_per_feature <- NULL ###used for determining which ions are background ions and which are signal ions
  background_intensity_GAM_table_per_feature_sum <- NULL ###used to later impute missing values

  median_decoy_sd_per_sample <- base::as.data.frame(t(matrixStats::colMedians(as.matrix(decoy_sd_intensity),na.rm=T)))
  colnames(median_decoy_sd_per_sample) <- colnames(decoy_sd_intensity)

  median_decoy_ion_count_per_sample <- base::as.data.frame(t(matrixStats::colMedians(as.matrix(decoy_ioncount),na.rm=T)))
  colnames(median_decoy_ion_count_per_sample) <- colnames(decoy_ioncount)

  max <- ncol(decoy_mean_intensity)
  pb <- tcltk::tkProgressBar(title = "Model background intensities per feature",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
  start_time <- Sys.time()
  updatecounter <- 0
  time_require <- 0

  for(c in 1:ncol(decoy_mean_intensity))
  {
    cur_sample <- colnames(decoy_mean_intensity)[c]
    if(c != 1)
    {
      background_intensity_GAM_table_per_feature <- cbind(background_intensity_GAM_table_per_feature,
                                                          base::data.frame(mean_intensity=stats::predict(fit_gam_per_sample[[cur_sample]], base::data.frame(RT = features$RT))))
      background_intensity_GAM_table_per_feature_sum <- cbind(background_intensity_GAM_table_per_feature_sum,
                                                              base::data.frame(mean_intensity=stats::predict(fit_gam_per_sample[[cur_sample]], base::data.frame(RT = features$RT))))
    }else
    {
      background_intensity_GAM_table_per_feature <- base::data.frame(mean_intensity=stats::predict(fit_gam_per_sample[[cur_sample]], base::data.frame(RT = features$RT)))
      background_intensity_GAM_table_per_feature_sum <- base::data.frame(mean_intensity=stats::predict(fit_gam_per_sample[[cur_sample]], base::data.frame(RT = features$RT)))
    }
    colnames(background_intensity_GAM_table_per_feature)[ncol(background_intensity_GAM_table_per_feature)] <- cur_sample
    colnames(background_intensity_GAM_table_per_feature_sum)[ncol(background_intensity_GAM_table_per_feature_sum)] <- cur_sample

    background_intensity_GAM_table_per_feature_sum[,c] <- stats::rnorm(n=nrow(background_intensity_GAM_table_per_feature_sum),mean = background_intensity_GAM_table_per_feature_sum[,c],sd = median_decoy_sd_per_sample[1,c])

    updatecounter <- updatecounter + 1
    if(updatecounter >= 1)
    {
      time_elapsed <- difftime(Sys.time(),start_time,units="secs")
      time_require <- (time_elapsed/(c/max))*(1-(c/max))
      td <- lubridate::seconds_to_period(time_require)
      time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

      updatecounter <- 0
      tcltk::setTkProgressBar(pb, c, label=base::paste( round(c/max*100, 0)," % done (",c,"/",max,", Time require: ",time_require,")",sep = ""))
    }
  }
  close(pb)

  sd_background_intensity <- median_decoy_sd_per_sample

  rownames(background_intensity_GAM_table_per_feature) <- features$Feature_name
  rownames(background_intensity_GAM_table_per_feature_sum) <- features$Feature_name

  QC_data[["Decoy_feature_parameters"]] <- list(Decoy_mean_intensity_per_RT = base::data.frame(RT_all=RT_all,
                                                                                         mean_intensity=x_all),
                                                GAM_model_average=fit_gam_mean,
                                                GAM_model_per_sample=fit_gam_per_sample,
                                                background_intensity_GAM_table_per_feature=background_intensity_GAM_table_per_feature,
                                                background_intensity_GAM_table_per_feature_sum=background_intensity_GAM_table_per_feature_sum,
                                                sd_background_intensity=sd_background_intensity,
                                                median_decoy_ion_count_per_sample=median_decoy_ion_count_per_sample,
                                                decoy_intensities=decoy_intensities,
                                                decoy_mean_intensity=decoy_mean_intensity,
                                                decoy_sd_intensity=decoy_sd_intensity,
                                                decoy_ioncount=decoy_ioncount)

  ###now select 1000 decoy features randomly for which we will perform peak selection and quantification
  decoy_features <- which(grepl("_d",features$Feature_name))
  if(length(decoy_features)>1000)
  {
    set.seed(1)
    select_decoys <- sample(decoy_features,1000)
    decoy_features_keep <- features[select_decoys,]
    ###now add isotope peaks of these decoy features
    isotope_features <- decoy_features_keep
    isotope_features$m.z <- ((isotope_features$m.z*isotope_features$Charge)+1.002054)/isotope_features$Charge
    delta_mz <- isotope_features$m.z_range_max-isotope_features$m.z_range_min
    isotope_features$m.z_range_max <- isotope_features$m.z+(delta_mz/2)
    isotope_features$m.z_range_min <- isotope_features$m.z-(delta_mz/2)
    isotope_features$Feature_name <- base::paste(isotope_features$Feature_name,"_i",sep="")
    decoy_features_keep <- rbind(decoy_features_keep,isotope_features)

  }else ##less than 1000 decoys so just continue
  {
    decoy_features_keep <- features[decoy_features,]
    ###now add isotope peaks of these decoy features
    isotope_features <- decoy_features_keep
    isotope_features$m.z <- ((isotope_features$m.z*isotope_features$Charge)+1.002054)/isotope_features$Charge
    delta_mz <- isotope_features$m.z_range_max-isotope_features$m.z_range_min
    isotope_features$m.z_range_max <- isotope_features$m.z+(delta_mz/2)
    isotope_features$m.z_range_min <- isotope_features$m.z-(delta_mz/2)
    isotope_features$Feature_name <- base::paste(isotope_features$Feature_name,"_i",sep="")
    decoy_features_keep <- rbind(decoy_features_keep,isotope_features)
  }
  ###remove all decoy features
  features <- features[-decoy_features,]
  ###add selected decoy features plus isotope decoy features
  features <- rbind(features,decoy_features_keep)

  ###see if already samples were converted
  samples <- mzXMLfiles
  samples <- base::substr(samples,1,regexpr("_all_ions.RData",samples)-1)
  Requant_samples <- base::gsub("RT_calibration\\.","",colnames(features)[which(grepl("RT_calibration\\.",colnames(features)))])
  Requant_samples <- base::gsub("\\.","-",Requant_samples)

  samples <- samples[which(samples %in% Requant_samples)]

  dir.create(base::paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,sep=""),showWarnings = F)
  available <- list.files(base::paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,sep=""))
  available <- available[which(grepl("feature_quant.RData",available))]
  available <- base::gsub("_feature_quant.RData","",available)
  peak_min_ion_count <- grDevices::boxplot.stats(as.numeric(as.matrix(decoy_ioncount)))$stats[4]

  # return a warning if for some samples decoy features were already extracted
  if(length(which(samples %in% available))>0)
  {
    indx <- which(samples %in% available)
    options(warn=1)
    warning(paste0(Sys.time()," Feature intensities were already extracted for ",paste(samples[indx],collapse=",")," and will be used for subsequent IceR steps. If this is unintended because raw files, MaxQuant results, or IceR parameters were changed, please stop IceR now, delete the folder ",paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,sep="")," and restart IceR. If any of the previously mentioned changes were made, please consider removing the complete Temporary_files folder to enable a fresh run of IceR or at least Quantification_raw_results.RData."))
    options(warn=-1)
  }

  if(length(which(samples %not in% available))>0)
  {
    print(paste0(Sys.time()," Perform peak detection"))
    samples <- samples[which(samples %not in% available)]
    ##use decoy defined cut of to distinguish background intensity from signal intensity per feature
    extract_intensities_worker(Sample_IDs = as.character(samples),
                               features_select = features,
                               path_to_raw = base::paste(path_to_mzXML,"/all_ion_lists",sep=""),
                               path_to_output_folder = base::paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,sep=""),
                               RT_calibration=RT_calibration,
                               mz_calibration=mz_calibration,
                               peak_detection=peak_detection,
                               n_cores=ifelse(MassSpec_mode == "Orbitrap",n_cores,ifelse(n_cores >= 3,3,n_cores)),
                               ion_intensity_cutoff = T,
                               mean_background_ion_intensity_model = background_intensity_GAM_table_per_feature,
                               sd_background_ion_intensity = sd_background_intensity,
                               peak_min_ion_count=peak_min_ion_count,
                               kde_resolution = kde_resolution,
                               num_peaks_store = num_peaks_store,
                               plots = plot_peak_detection,
                               MassSpec_mode = MassSpec_mode,
                               use_IM_data=use_IM_data) ###define background peaks during 2DKDE as peaks with <= 75% quantile
    print(paste0(Sys.time()," Peak detection finished"))
  }

  #####Summarize data for all features and samples
  samples <- list.files(base::paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,sep=""))
  samples <- samples[which(grepl("feature_quant.RData",samples))]
  samples <- base::substr(samples,1,regexpr("_feature_quant.RData",samples)-1)

  ###Available sample data
  files <- list.files(base::paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,sep=""))
  if(length(which(!grepl("feature_quant.RData",files)))>0)
  {
    files <- files[-which(!grepl("feature_quant.RData",files))]
  }

  features_select <- features

  features_intensity <- base::as.data.frame(matrix(ncol=length(samples),nrow=nrow(features_select)))#
  colnames(features_intensity) <- samples
  features_intensity <- sapply( features_intensity, as.numeric )
  features_intensity <- data.table::as.data.table(features_intensity)
  rownames(features_intensity) <- features_select$Feature_name
  Ioncount_feature_sample_matrix <- features_intensity

  feature_with_background_intensity <- features_intensity
  Ioncount_feature_with_background_intensity <- features_intensity

  ###prepare matrices to store data
  if(peak_detection == T)
  {
    peak_quant <- list()
    for(p in 1:(num_peaks_store+1))
    {
      features_intensity_temp <- features_intensity
      feature_with_background_intensity_temp <- feature_with_background_intensity
      Ioncount_feature_sample_matrix_temp <- Ioncount_feature_sample_matrix
      Ioncount_feature_with_background_intensity_temp <- Ioncount_feature_with_background_intensity
      #dT1_temp <- dT1
      #dM1_temp <- dM1
      Peak_rt_temp <- features_intensity
      Peak_mz_temp <- features_intensity
      num_peaks_temp <- features_intensity
      correct_peak_temp <- features_intensity
      Peak_rt_with_background_temp <- features_intensity
      Peak_mz_with_background_temp <- features_intensity
      num_peaks_with_background_temp <- features_intensity
      correct_peak_with_background_temp <- features_intensity
      peak_quant[[p]] <- list(features_intensity=features_intensity_temp,
                              feature_with_background_intensity=feature_with_background_intensity_temp,
                              Ioncount_feature_sample_matrix=Ioncount_feature_sample_matrix_temp,
                              Ioncount_feature_with_background_intensity=Ioncount_feature_with_background_intensity_temp,
                              #dT1=dT1_temp,
                              #dM1=dM1_temp,
                              Peak_rt=Peak_rt_temp,
                              Peak_mz=Peak_mz_temp,
                              num_peaks=num_peaks_temp,
                              correct_peak=correct_peak_temp,
                              Peak_rt_with_background=Peak_rt_with_background_temp,
                              Peak_mz_with_background=Peak_mz_with_background_temp,
                              num_peaks_with_background=num_peaks_with_background_temp,
                              correct_peak_with_background=correct_peak_with_background_temp)
    }
    names(peak_quant) <- c(base::paste("Peak_",1:(num_peaks_store),sep=""),"Standard")
  }

  max <- ncol(features_intensity)
  pb <- tcltk::tkProgressBar(title = "Merge quantification results",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
  start_time <- Sys.time()
  updatecounter <- 0
  time_require <- 0

  ###read the data
  for(c in 1:ncol(features_intensity))
  {
    if(peak_detection == F)
    {
      ###Now extract intensities, row1 = Intensity summed, row2 = num ions, row3 = mean intensity, row4 = sd intensity,
      ###row5 = detected optimum mz window minimum
      ###row6 = detected optimum mz window maximum
      ###row7 = detected optimum RT window minimum
      ###row8 = detected optimum RT window maximum
      ###row9 = number of detected peaks
      ###row10 = index which of the peaks should be the correct one (based on known observed RT for the respective feature)

      #load stored data into variable peaks_quant
      peaks_quant <- NULL
      load(base::paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,"/",colnames(features_intensity)[c],"_feature_quant.RData",sep=""))

      signal=peaks_quant[[1]]$Intensities
      signal_background=peaks_quant[[1]]$Intensities_signal_background

      features_intensity[,c] <- signal[,1]
      Ioncount_feature_sample_matrix[,c] <- signal[,2]

      feature_with_background_intensity[,c] <- signal_background[,1]
      Ioncount_feature_with_background_intensity[,c] <- signal_background[,2]

    }else
    {
      #load stored data into variable peaks_quant
      load(base::paste(path_to_mzXML,"/all_ion_lists/Extracted feature intensities",output_file_names_add,"/",colnames(features_intensity)[c],"_feature_quant.RData",sep=""))

      for(p in 1:(num_peaks_store+1))
      {
        ###Now extract intensities, row1 = Intensity summed, row2 = num ions, row3 = mean intensity, row4 = sd intensity,
        ###row5 = detected optimum mz window minimum
        ###row6 = detected optimum mz window maximum
        ###row7 = detected optimum RT window minimum
        ###row8 = detected optimum RT window maximum
        ###row9 = number of detected peaks
        ###row10 = index which of the peaks should be the correct one (based on known observed RT for the respective feature)

        signal=peaks_quant[[p]]$Intensities
        signal_background=peaks_quant[[p]]$Intensities_signal_background

        peak_quant[[p]]$features_intensity[,c] <- signal[,1]
        peak_quant[[p]]$Ioncount_feature_sample_matrix[,c] <- signal[,2]
        peak_quant[[p]]$Peak_rt[,c] <- (signal[,7]+signal[,8])/2
        peak_quant[[p]]$Peak_mz[,c] <- (signal[,5]+signal[,6])/2
        peak_quant[[p]]$num_peaks[,c] <- signal[,9]
        peak_quant[[p]]$correct_peak[,c] <- signal[,10]

        peak_quant[[p]]$feature_with_background_intensity[,c] <- signal_background[,1]
        peak_quant[[p]]$Ioncount_feature_with_background_intensity[,c] <- signal_background[,2]
        peak_quant[[p]]$Peak_rt_with_background[,c] <- (signal_background[,7]+signal_background[,8])/2
        peak_quant[[p]]$Peak_mz_with_background[,c] <- (signal_background[,5]+signal_background[,6])/2
        peak_quant[[p]]$num_peaks_with_background[,c] <- signal_background[,9]
        peak_quant[[p]]$correct_peak_with_background[,c] <- signal_background[,10]

      }
    }
    updatecounter <- updatecounter + 1
    if(updatecounter >= 1)
    {
      time_elapsed <- difftime(Sys.time(),start_time,units="secs")
      time_require <- (time_elapsed/(c/max))*(1-(c/max))
      td <- lubridate::seconds_to_period(time_require)
      time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

      updatecounter <- 0
      tcltk::setTkProgressBar(pb, c, label=base::paste( round(c/max*100, 0)," % done (",c,"/",max,", Time require: ",time_require,")",sep = ""))
    }
  }
  close(pb)

  #if peak detection was performed, then check if peak selection was already done. If yes, load stored data, if not, perform peak selection
  if(file.exists(base::paste(path_to_features,"/Temporary_files/Quantification_raw_results.RData",sep="")))
  {
    options(warn=1)
    warning("Peak selection was already performed and stored results will be used. If this is unintended because Raw files, MaxQuant results, or IceR parameters were changed, please remove Quantification_raw_results.RData in Temporary_files folder or the complete Temporary_files folder to enable a fresh run of IceR.")
    options(warn=-1)

    load(base::paste(path_to_features,"/Temporary_files/Quantification_raw_results.RData",sep=""))
    crap <- gc(F)
  }else
  {
    print(paste0(Sys.time()," Perform peak selection"))
    ###if no peak detection was done, directly continue
    ###otherwise select per feature best peak
    if(peak_detection == T)
    {
      features_intensity <- as.matrix(features_intensity)
      Ioncount_feature_sample_matrix <- as.matrix(Ioncount_feature_sample_matrix)
      feature_with_background_intensity <- as.matrix(feature_with_background_intensity)
      Ioncount_feature_with_background_intensity <- as.matrix(Ioncount_feature_with_background_intensity)

      ###prepare matrix in which we store decisions which peaks where used for quantification
      peak_selected <- matrix(ncol=ncol(feature_with_background_intensity),nrow=nrow(feature_with_background_intensity))
      colnames(peak_selected) <- colnames(feature_with_background_intensity)

      ###transfer all quantification data from Standard for quantifications where no peak was detected
      selection <- which(is.na(as.matrix(peak_quant$Standard$num_peaks_with_background)) & is.na(as.matrix(peak_quant$Peak_1$num_peaks_with_background)))
      features_intensity[selection] <- as.matrix(peak_quant$Standard$features_intensity)[selection]
      Ioncount_feature_sample_matrix[selection] <- as.matrix(peak_quant$Standard$Ioncount_feature_sample_matrix)[selection]
      feature_with_background_intensity[selection] <- as.matrix(peak_quant$Standard$feature_with_background_intensity)[selection]
      Ioncount_feature_with_background_intensity[selection] <- as.matrix(peak_quant$Standard$Ioncount_feature_with_background_intensity)[selection]
      peak_selected[selection] <- num_peaks_store+1 ###standard window

      ###next, transfer all quantification data from Peak1 for quantifications where correct peak is known
      selection <- which(as.matrix(peak_quant$Peak_1$correct_peak_with_background == 1))
      features_intensity[selection] <- as.matrix(peak_quant$Peak_1$features_intensity)[selection]
      Ioncount_feature_sample_matrix[selection] <- as.matrix(peak_quant$Peak_1$Ioncount_feature_sample_matrix)[selection]
      feature_with_background_intensity[selection] <- as.matrix(peak_quant$Peak_1$feature_with_background_intensity)[selection]
      Ioncount_feature_with_background_intensity[selection] <- as.matrix(peak_quant$Peak_1$Ioncount_feature_with_background_intensity)[selection]
      peak_selected[selection] <- 1 ###closest peak = peak 1

      ###if no peak is know, change from NA to 0
      peak_quant$Peak_1$correct_peak[is.na(peak_quant$Peak_1$correct_peak_with_background)] <- 0
      peak_quant$Peak_1$correct_peak_with_background[is.na(peak_quant$Peak_1$correct_peak_with_background)] <- 0

      features_intensity <- base::as.data.frame(features_intensity)
      Ioncount_feature_sample_matrix <- base::as.data.frame(Ioncount_feature_sample_matrix)
      feature_with_background_intensity <- base::as.data.frame(feature_with_background_intensity)
      Ioncount_feature_with_background_intensity <- base::as.data.frame(Ioncount_feature_with_background_intensity)
      peak_selected <- base::as.data.frame(peak_selected)

      ###For the next step of peak quantifications where true peak is not known we will need RT and mz correction factors per feature
      ###Extract RT and mz correction factors per sample and feature
      indices_RT_correction <- which(grepl("RT_calibration",colnames(features_select)))
      indices_mz_correction <- which(grepl("mz_calibration",colnames(features_select)))
      ordering_indices <- match(base::gsub("-",".",samples),base::gsub("RT_calibration\\.","",colnames(features_select)[indices_RT_correction]))
      indices_RT_correction <- indices_RT_correction[ordering_indices]
      indices_mz_correction <- indices_mz_correction[ordering_indices]

      RT_correction_factors <- features_select[,indices_RT_correction]
      mz_correction_factors <- features_select[,indices_mz_correction]
      colnames(RT_correction_factors) <- samples
      rownames(RT_correction_factors) <- features_select$Feature_name
      colnames(mz_correction_factors) <- samples
      rownames(mz_correction_factors) <- features_select$Feature_name

      ###general RT and mz windows
      delta_mz <- stats::median(features_select$m.z - features_select$m.z_range_min,na.rm=T)
      delta_rt <- stats::median(features_select$RT - features_select$RT_range_min,na.rm=T)/2

      ###split up quant results into chunks of 50000 features
      ###perform individually peak selection per chunk
      ###combine results afterwards again
      ###purpose: keep peak quant file size low otherwise memory can be quickly overloaded in case of a large experiment
      chunk_size <- 50000
      num_chunks <- ceiling(nrow(features)/chunk_size)
      for(chunk in 1:num_chunks)
      {
        start <- (chunk-1)*chunk_size+1
        end <- (chunk)*chunk_size
        if(end > nrow(features))end <- nrow(features)

        features_temp <- features_select[start:end,]

        peak_quant_temp <- peak_quant
        for(p in 1:(num_peaks_store+1))
        {
          for(p2 in 1:length(peak_quant_temp[[p]]))
          {
            peak_quant_temp[[p]][[p2]] <- peak_quant_temp[[p]][[p2]][start:end,]
          }
        }

        ######perform peak decision
        #suppressWarnings(suppressMessages(library(doParallel,quietly = T)))

        cl <- parallel::makeCluster(n_cores)
        doParallel::registerDoParallel(cl)

        res <- foreach::foreach(s=1:length(samples)) %dopar%
          {
            peak_decision(features_select = features_temp,
                          peak_quant = peak_quant_temp,
                          samples,
                          s = s,
                          RT_correction_factors = RT_correction_factors[start:end,],
                          mz_correction_factors = mz_correction_factors[start:end,],
                          features_intensity_sample = features_intensity[start:end,s,drop=F],
                          Ioncount_sample = Ioncount_feature_sample_matrix[start:end,s,drop=F],
                          feature_with_background_intensity_sample = feature_with_background_intensity[start:end,s,drop=F],
                          Ioncount_with_background_sample = Ioncount_feature_with_background_intensity[start:end,s,drop=F],
                          peak_selected_sample = peak_selected[start:end,s,drop=F],
                          delta_mz = delta_mz,
                          delta_rt = delta_rt,
                          peak_min_ion_count=peak_min_ion_count,
                          chunk=chunk,
                          num_chunks=num_chunks)
          }
        parallel::stopCluster(cl)

        ###merge results from all threads
        for(c in 1:length(samples))
        {
          data.table::set(features_intensity,as.integer(start:end),c,res[[c]]$features_intensity_sample)
          data.table::set(Ioncount_feature_sample_matrix,as.integer(start:end),c,res[[c]]$Ioncount_sample)
          data.table::set(feature_with_background_intensity,as.integer(start:end),c,res[[c]]$feature_with_background_intensity_sample)
          data.table::set(Ioncount_feature_with_background_intensity,as.integer(start:end),c,res[[c]]$Ioncount_with_background_sample)
          data.table::set(peak_selected,as.integer(start:end),c,res[[c]]$peak_selected_sample)
        }

      }

      rownames(peak_selected) <- features$Feature_name
    }else
    {
      peak_selected <- NULL
    }

    print(paste0(Sys.time()," Peak selection finished"))

    rownames(features_intensity) <- features$Feature_name
    rownames(Ioncount_feature_sample_matrix) <- features$Feature_name
    rownames(feature_with_background_intensity) <- features$Feature_name
    rownames(Ioncount_feature_with_background_intensity) <- features$Feature_name

    setwd(path_to_features)
    dir.create("Temporary_files")
    setwd(base::paste(path_to_features,"/Temporary_files",sep=""))
    print(paste0(Sys.time()," Save peak detection and selection results"))
    save(features,
         features_intensity,
         Ioncount_feature_sample_matrix,
         feature_with_background_intensity,
         Ioncount_feature_with_background_intensity,
         peak_selected,
         peak_quant,
         QC_data,
         path_to_features,
         path_to_MaxQ_output,
         samples,
         delta_rt,
         delta_mz,
         peak_min_ion_count,
         num_peaks_store,
         RT_calibration,
         mz_calibration,
         abundance_estimation_correction,
         Quant_pVal_cut,
         alignment_variability_score_cutoff,
         alignment_scores_cutoff,
         mono_iso_alignment_cutoff,
         calc_peptide_LFQ,
         calc_protein_LFQ,
         MassSpec_mode,
         file = "Quantification_raw_results.RData")

    crap <- gc(F)
  }

  ###read previously generated outputs
  #load("Quantification_raw_results.RData")
  print(paste0(Sys.time()," Perform scoring of alignment and quantification"))

  setwd(path_to_features)

  grDevices::pdf("Temporary_files/Alignment and quantification scores.pdf")

  features$target_decoy <- ifelse(grepl("_d",features$Feature_name),"decoy","target")
  ###Tag decoy features which overlap with real features
  remove_decoy_outlier <- vector(mode="logical",length=length(which(features$target_decoy == "decoy")))
  features_target <- features[which(features$target_decoy == "target"),]
  decoy_indices <- which(features$target_decoy == "decoy")

  max <- length(decoy_indices)
  pb <- tcltk::tkProgressBar(title = "Detect target-decoy overlapping features",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
  start_time <- Sys.time()
  updatecounter <- 0
  time_require <- 0
  for(i in 1:length(decoy_indices))
  {
    index <- decoy_indices[i]
    overlap <- which(features_target$m.z_range_min > features$m.z_range_min[index] & features_target$m.z_range_min < features$m.z_range_max[index] |
                       features_target$m.z_range_max > features$m.z_range_min[index] & features_target$m.z_range_max < features$m.z_range_max[index])

    overlap <- which(features_target$RT_range_min[overlap] > features$RT_range_min[index] & features_target$RT_range_min[overlap] < features$RT_range_max[index] |
                       features_target$RT_range_max[overlap] > features$RT_range_min[index] & features_target$RT_range_max[overlap] < features$RT_range_max[index])
    if(length(overlap)>0)
    {
      remove_decoy_outlier[i] <- T
    }

    updatecounter <- updatecounter + 1
    if(updatecounter >= 10)
    {
      time_elapsed <- difftime(Sys.time(),start_time,units="secs")
      time_require <- (time_elapsed/(i/max))*(1-(i/max))
      td <- lubridate::seconds_to_period(time_require)
      time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

      updatecounter <- 0
      tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
    }
  }
  close(pb)
  features$overlapping_decoys <- F
  features$overlapping_decoys[decoy_indices] <- remove_decoy_outlier

  ###t-test based determination of background quantifications
  ###determine if ion count significantly deviates from decoy feature ion counts
  ###perform this for signal intensities and background+signal intensities
  #Signal+Background quantification

  decoy_ion_count <- as.numeric(as.matrix(Ioncount_feature_with_background_intensity[which(features$target_decoy == "decoy" & !grepl("_d_i",features$Feature_name) & features$overlapping_decoys == F),]))
  decoy_ion_count[is.na(decoy_ion_count)] <- 0

  ###add a count of 1 for log scale
  decoy_ion_count <- decoy_ion_count + 1

  ###determine mean and sd of log2 transformed decoy ion counts for z scoring
  mean_decoy_count <- mean(base::log2(decoy_ion_count))
  sd_decoy_count <- stats::sd(base::log2(decoy_ion_count))

  ###Prepare for testing if observed ion counts of target features is significantly higher than for decoy ions (just random peak selection)
  target_ion_counts <- Ioncount_feature_with_background_intensity
  rows <- nrow(target_ion_counts)
  rownames(target_ion_counts) <- rownames(Ioncount_feature_with_background_intensity)
  target_ion_counts <- as.matrix(target_ion_counts)
  ###add 1 for log scale
  target_ion_counts <- target_ion_counts + 1
  zscores <- base::as.data.frame((base::log2(target_ion_counts)-mean_decoy_count)/sd_decoy_count)

  zscore_to_pval <- function(z, alternative="greater")
  {

    if(alternative == "greater")
    {
      pval = stats::pnorm(-z)
    }else if(alternative == "two.sided")
    {
      pval = stats::pnorm(-abs(z))
      pval = 2 * pval
    }else if(alternative=="less")
    {
      pval = stats::pnorm(z)
    }
    return(pval)
  }

  ###perform significance test
  pval_signal_with_background_quant <- base::as.data.frame(apply(zscores,1:2,zscore_to_pval))
  temp_pval_quant <- pval_signal_with_background_quant
  temp_pval_quant[is.na(temp_pval_quant)] <- 1

  QC_data[["Decoy_ion_counts"]] <- decoy_ion_count
  QC_data[["Target_ion_counts"]] <- target_ion_counts
  QC_data[["Quant_pval"]] <- pval_signal_with_background_quant

  ###plot pvalue of quantification per sample for target features
  if(ncol(temp_pval_quant) <= 15) ###only 15 samples can be plotted in one plot
  {
    graphics::par(mar=c(10,4,4,4))
    graphics::boxplot(-log10(temp_pval_quant[which(!grepl("_d",rownames(temp_pval_quant))),]),outline=F,main="Quantification pVals per target feature",ylab="pValue, -log10",names=colnames(temp_pval_quant),las=2)
    graphics::abline(h=-log10(Quant_pVal_cut),lty=2,col="red")
  }else
  {
    graphics::par(mar=c(10,4,4,4))
    pages <- ceiling(ncol(temp_pval_quant)/15)
    for(p in 1:pages)
    {
      columns <- (((p-1)*15)+1):(p*15)
      columns <- columns[which(columns <= ncol(temp_pval_quant))]
      graphics::boxplot(-log10(temp_pval_quant[which(!grepl("_d",rownames(temp_pval_quant))),columns]),outline=F,main="Quantification pVals per target feature",ylab="pValue, -log10",names=colnames(temp_pval_quant)[columns],las=2)
      graphics::abline(h=-log10(Quant_pVal_cut),lty=2,col="red")
    }
  }

  ###plot pvalue of quantification per sample for decoy features
  if(ncol(temp_pval_quant) <= 15) ###only 15 samples can be plotted in one plot
  {
    graphics::par(mar=c(10,4,4,4))
    graphics::boxplot(-log10(temp_pval_quant[which(grepl("_d$",rownames(temp_pval_quant))),]),outline=F,main="Quantification pVals per decoy feature",ylab="pValue, -log10",names=colnames(temp_pval_quant),las=2)
    graphics::abline(h=-log10(Quant_pVal_cut),lty=2,col="red")
  }else
  {
    graphics::par(mar=c(10,4,4,4))
    pages <- ceiling(ncol(temp_pval_quant)/15)
    for(p in 1:pages)
    {
      columns <- (((p-1)*15)+1):(p*15)
      columns <- columns[which(columns <= ncol(temp_pval_quant))]
      graphics::boxplot(-log10(temp_pval_quant[which(grepl("_d$",rownames(temp_pval_quant))),columns]),outline=F,main="Quantification pVals per decoy feature",ylab="pValue, -log10",names=colnames(temp_pval_quant)[columns],las=2)
      graphics::abline(h=-log10(Quant_pVal_cut),lty=2,col="red")
    }
  }

  ###plot per sample how many target feature quantifications were significant and not significant
  plot_data <- rbind(colSums(temp_pval_quant[which(!grepl("_d|_i",rownames(temp_pval_quant))),] < Quant_pVal_cut,na.rm=T),
                     colSums(temp_pval_quant[which(!grepl("_d|_i",rownames(temp_pval_quant))),] > Quant_pVal_cut,na.rm=T))
  rownames(plot_data) <- c("certain","uncertain")

  if(ncol(plot_data) <= 15) ###only 15 samples can be plotted in one plot
  {
    p <- Barplotsstacked(plot_data,AvgLine = F,col=c("chocolate","grey"),shownumbers_total = F,shownumbers = T,Legends = rownames(plot_data),main="Certainty of ion accumulation for target features",ylab="Count",ylim=c(0,max(colSums(plot_data,na.rm=T))),Legendtitle = "Accumulation")
  }else ###if more samples, split samples up
  {
    pages <- ceiling(ncol(plot_data)/15)
    for(p in 1:pages)
    {
      columns <- (((p-1)*15)+1):(p*15)
      columns <- columns[which(columns <= ncol(plot_data))]
      p <- Barplotsstacked(plot_data[,columns],AvgLine = F,col=c("chocolate","grey"),shownumbers_total = F,shownumbers = T,Legends = rownames(plot_data),main="Certainty of ion accumulation for target features",ylab="Count",ylim=c(0,max(colSums(plot_data,na.rm=T))),Legendtitle = "Accumulation")
    }
  }

  ###plot per sample how many decoy feature quantifications were significant and not significant
  plot_data <- rbind(colSums(temp_pval_quant[which(grepl("_d$",rownames(temp_pval_quant))),] < Quant_pVal_cut,na.rm=T),
                     colSums(temp_pval_quant[which(grepl("_d$",rownames(temp_pval_quant))),] > Quant_pVal_cut,na.rm=T))
  rownames(plot_data) <- c("certain","uncertain")

  if(ncol(plot_data) <= 15) ###only 15 samples can be plotted in one plot
  {
    p <- Barplotsstacked(plot_data,AvgLine = F,col=c("chocolate","grey"),shownumbers_total = F,shownumbers = T,Legends = rownames(plot_data),main="Certainty of ion accumulation for decoy features",ylab="Count",ylim=c(0,max(colSums(plot_data,na.rm=T))),Legendtitle = "Accumulation")
  }else ###if more samples, split samples up
  {
    pages <- ceiling(ncol(plot_data)/15)
    for(p in 1:pages)
    {
      columns <- (((p-1)*15)+1):(p*15)
      columns <- columns[which(columns <= ncol(plot_data))]
      p <- Barplotsstacked(plot_data[,columns],AvgLine = F,col=c("chocolate","grey"),shownumbers_total = F,shownumbers = T,Legends = rownames(plot_data),main="Certainty of ion accumulation for decoy features",ylab="Count",ylim=c(0,max(colSums(plot_data,na.rm=T))),Legendtitle = "Accumulation")
    }
  }


  ###Boxplot per sample ratio of feature signal/background
  features_background_intensity <- log10(10^feature_with_background_intensity-10^features_intensity)
  S2B <- base::log2(10^features_intensity/10^features_background_intensity)
  S2B[is.infinite(as.matrix(S2B))] <- NA
  S2B <- base::as.data.frame(S2B)
  rownames(S2B) <- features$Feature_name

  QC_data[["Signal_to_background_target_decoy"]] <- list(Target_Decoy=ifelse(grepl("_d",rownames(S2B)),"decoy","target"),
                                                         S2B=S2B)

  if(ncol(S2B) <= 15) ###only 15 samples can be plotted in one plot
  {
    graphics::par(mar=c(10,4,4,4))
    graphics::boxplot(S2B[which(!grepl("_d",rownames(S2B))),],outline=F,main="Signal to background ratio per feature quantification",ylab="Signal/Background, log2",names=colnames(S2B),las=2)
    graphics::abline(h=0,lty=2,col="red")
    for(co in 1:ncol(S2B))
    {
      med <- stats::median(S2B[which(!grepl("_d",rownames(S2B))),co],na.rm=T)
      graphics::text(co,med+((graphics::par("usr")[4]-graphics::par("usr")[3])*0.05),round(med,digits=1))
    }

  }else
  {
    graphics::par(mar=c(10,4,4,4))
    pages <- ceiling(ncol(S2B)/15)
    for(p in 1:pages)
    {
      columns <- (((p-1)*15)+1):(p*15)
      columns <- columns[which(columns <= ncol(S2B))]
      graphics::boxplot(S2B[which(!grepl("_d",rownames(S2B))),columns],outline=F,main="Signal to background ratio per feature quantification",ylab="Signal/Background, log2",names=colnames(S2B)[columns],las=2)
      graphics::abline(h=0,lty=2,col="red")
      for(co in 1:length(columns))
      {
        med <- stats::median(S2B[which(!grepl("_d",rownames(S2B))),columns[co]],na.rm=T)
        graphics::text(co,med+((graphics::par("usr")[4]-graphics::par("usr")[3])*0.05),round(med,digits=1))
      }
    }
  }

  pval_signal_with_background_quant <- base::as.data.frame(pval_signal_with_background_quant)
  features_intensity <- as.matrix(features_intensity)
  feature_with_background_intensity <- as.matrix(feature_with_background_intensity)
  features_intensity[is.infinite(features_intensity)] <- NA
  feature_with_background_intensity[is.infinite(feature_with_background_intensity)] <- NA
  features_intensity <- base::as.data.frame(features_intensity)
  feature_with_background_intensity <- base::as.data.frame(feature_with_background_intensity)

  ###calculate some scores which give insight in confidence of quantifications (selection of right peaks)

  #arrange selected peaks results in a long table

  peaks <- base::data.frame(RT=unlist(sapply(peak_quant, function(x) x$Peak_rt_with_background)),
                      mz=unlist(sapply(peak_quant, function(x) x$Peak_mz_with_background)),
                      known=unlist(sapply(peak_quant, function(x) x$correct_peak_with_background)),
                      ion_count=unlist(sapply(peak_quant, function(x) x$Ioncount_feature_with_background_intensity)),
                      intensity=unlist(sapply(peak_quant, function(x) x$feature_with_background_intensity)))
  peaks$sample <- rep(sort(rep(samples,nrow(features))),length(peak_quant))
  peaks$feature <- rep(features$Feature_name,length(peak_quant)*length(samples))
  peaks$peak <- sort(rep(1:length(peak_quant),nrow(features)*length(samples)))
  peaks$known[is.na(peaks$known)] <- 0

  #add information which peak was finally used
  peaks$selected <- NA
  for(s in samples)
  {
    selection_sample <- which(peaks$sample == s)
    matching_indices <- match(peaks$feature[selection_sample],rownames(peak_selected))
    peaks$selected[selection_sample] <- peak_selected[matching_indices,s]
  }

  #add corrected RT and mz per entry (eliminate expected deviations between samples)
  peaks$RT_correct <- NA
  peaks$mz_correct <- NA
  peaks$RT_correct <- as.numeric(peaks$RT_correct)
  peaks$mz_correct <- as.numeric(peaks$mz_correct)
  for(s in 1:length(samples))
  {
    corrections <- features[,c(1,which(grepl(base::paste("_calibration\\.",base::gsub("-",".",samples[s]),"$",sep=""),colnames(features))))]
    selection_peaks <- which(peaks$sample == samples[s])
    RT_corrected <- peaks$RT[selection_peaks]-corrections[match(peaks$feature[selection_peaks],corrections$Feature_name),2]
    mz_corrected <- peaks$mz[selection_peaks]-corrections[match(peaks$feature[selection_peaks],corrections$Feature_name),3]
    data.table::set(peaks,as.integer(selection_peaks),c(10L,11L),value=list(RT_corrected,mz_corrected))
  }


  #calculate general peak variability score using delta_rt and delta_mz as sd for zscoring
  ###feature with significant score here should be potentially excluded
  Variability_alignment_scoring <- function(features,peaks,delta_rt,delta_mz,alignment_variability_score_cutoff=0.05,plot=T)
  {
    ###Convert z scores (vector or matrix) to pvalues
    zscore_to_pval <- function(z, alternative="greater")
    {

      if(alternative == "greater")
      {
        pval = stats::pnorm(-z)
      }else if(alternative == "two.sided")
      {
        pval = stats::pnorm(-abs(z))
        pval = 2 * pval
      }else if(alternative=="less")
      {
        pval = stats::pnorm(z)
      }
      return(pval)
    }
    #select peaks which were selected by peak selection
    temp <- peaks[which(peaks$peak == peaks$selected),]
    #calculate interquartile range per feature over samples for RT and mz
    temp_RT <- stats::aggregate(temp$RT,by=list(Feature=temp$feature),FUN=matrixStats::iqr,na.rm=T)
    temp_mz <- stats::aggregate(temp$mz,by=list(Feature=temp$feature),FUN=matrixStats::iqr,na.rm=T)

    #z score using delta RT and delta mz as sd
    temp_RT$x <- (temp_RT$x-mean(temp_RT$x,na.rm=T))/delta_rt
    temp_mz$x <- (temp_mz$x-mean(temp_mz$x,na.rm=T))/delta_mz
    ##features with a negative z-score represent features with much lower deviation between samples than expected
    ##features with a positive z-score represent features with much higher deviation between samples than expected --> have to be inspected and eventually removed
    #graphics::boxplot(temp_mz$x,main="General mz-deviation of selected peaks between samples",ylab="Z-score - mz deviation",ylim=c(-4,4))
    #graphics::boxplot(temp_RT$x,main="General RT-deviation of selected peaks between samples",ylab="Z-score - RT deviation",ylim=c(-4,4))

    alignment_variability_score <- base::data.frame(RT_variability_pval=(zscore_to_pval(temp_RT$x)),
                                              mz_variability_pval=(zscore_to_pval(temp_mz$x)))

    alignment_variability_score <- alignment_variability_score[match(features$Feature_name,temp_RT$Feature),]
    rownames(alignment_variability_score) <- features$Feature_name
    alignment_variability_score <- base::as.data.frame(alignment_variability_score)
    alignment_variability_score <- cbind(alignment_variability_score,
                                         matrixStats::rowMins(as.matrix(alignment_variability_score),na.rm=T))
    colnames(alignment_variability_score)[3] <- "combined_variability_pval"

    #add some plots showing number of features with general high variability between samples
    if(plot == T)
    {
      temp <- matrixStats::rowMins(as.matrix(alignment_variability_score[which(!grepl("_d|_i",rownames(alignment_variability_score))),]),na.rm=T)
      Variability_scores <- cbind(length(which(temp > alignment_variability_score_cutoff)),
                                  length(which(temp < alignment_variability_score_cutoff)))
      colnames(Variability_scores) <- c("normal","high")

      p <- BarplotsSBS(Variability_scores,AvgLine = F,col=c("chocolate","grey"),shownumbers = T,main="Variability in peak selection between samples",ylab="Count")

    }

    return(alignment_variability_score)
  }

  alignment_variability_score <- Variability_alignment_scoring(features,peaks,delta_rt,delta_mz,alignment_variability_score_cutoff)

  #calculate standardized (z score) deviation of peak RT/mz from mean over all samples with used sd defined by delta_RT and delta_mz
  #z scores are then translated into two sided pvalues and per feature/sample the minimum of pval of RT or mz is reported
  Alignment_scoring <- function(peaks,features_select,samples,sd_RT=0.5,sd_mz=0.001,corrected_alignment=T,alignment_scores_cutoff=0.05,plot=T)
  {
    ####determine for every sample if detected peak RT or mz is significantly deviating from all other determined peak RT or mz
    #extract only finally used peaks
    temp_peaks <- peaks[which(peaks$peak == peaks$selected),]
    #order according to feature name
    temp_peaks <- temp_peaks[order(temp_peaks$feature,temp_peaks$sample),]

    #prepare matrices which should store RT and mz of selected peaks per sample and feature
    if(corrected_alignment == T)selected_peaks_RT <- stats::reshape(temp_peaks[,c("feature","sample","RT_correct")], idvar = "feature", timevar = "sample", direction = "wide")
    if(corrected_alignment == F)selected_peaks_RT <- stats::reshape(temp_peaks[,c("feature","sample","RT")], idvar = "feature", timevar = "sample", direction = "wide")
    rownames(selected_peaks_RT) <- selected_peaks_RT$feature
    selected_peaks_RT <- selected_peaks_RT[,-1]
    if(corrected_alignment == T)selected_peaks_mz <- stats::reshape(temp_peaks[,c("feature","sample","mz_correct")], idvar = "feature", timevar = "sample", direction = "wide")
    if(corrected_alignment == F)selected_peaks_mz <- stats::reshape(temp_peaks[,c("feature","sample","mz")], idvar = "feature", timevar = "sample", direction = "wide")
    rownames(selected_peaks_mz) <- selected_peaks_mz$feature
    selected_peaks_mz <- selected_peaks_mz[,-1]

    #calculate mean and sd of RT and mz per feature
    selected_peaks_distributions_RT <- base::data.frame(mean=rowMeans(as.matrix(selected_peaks_RT),na.rm=T),
                                                  sd=matrixStats::rowSds(as.matrix(selected_peaks_RT,na.rm=T)))
    selected_peaks_distributions_mz <- base::data.frame(mean=rowMeans(as.matrix(selected_peaks_mz),na.rm=T),
                                                  sd=matrixStats::rowSds(as.matrix(selected_peaks_mz,na.rm=T)))
    #convert observed RTs and mzs per feature and sample into z-scores
    selected_peaks_RT_zscore <- (selected_peaks_RT-selected_peaks_distributions_RT$mean)/sd_RT
    selected_peaks_mz_zscore <- (selected_peaks_mz-selected_peaks_distributions_mz$mean)/sd_mz
    #convert zscores into pvalues
    zscore_to_pval <- function(z)
    {
      pval = stats::pnorm(-abs(z))
      pval = 2 * pval
      return(pval)
    }
    selected_peaks_RT_pval <- base::as.data.frame(apply(selected_peaks_RT_zscore,1:2,zscore_to_pval))
    selected_peaks_mz_pval <- base::as.data.frame(apply(selected_peaks_mz_zscore,1:2,zscore_to_pval))
    if(corrected_alignment == T)colnames(selected_peaks_RT_pval) <- base::gsub("RT_correct.","",colnames(selected_peaks_RT_pval))
    if(corrected_alignment == T)colnames(selected_peaks_mz_pval) <- base::gsub("mz_correct.","",colnames(selected_peaks_mz_pval))
    if(corrected_alignment == F)colnames(selected_peaks_RT_pval) <- base::gsub("RT.","",colnames(selected_peaks_RT_pval))
    if(corrected_alignment == F)colnames(selected_peaks_mz_pval) <- base::gsub("mz.","",colnames(selected_peaks_mz_pval))
    #reorder rows to match to ordering in features_select
    selected_peaks_RT_pval <- selected_peaks_RT_pval[match(features_select$Feature_name,rownames(selected_peaks_RT_pval)),]
    selected_peaks_mz_pval <- selected_peaks_mz_pval[match(features_select$Feature_name,rownames(selected_peaks_mz_pval)),]
    #summarize alignment score per feature and sample to single minimal pval (either RT or mz)
    alignment_score_peaks <- pmin(selected_peaks_RT_pval,selected_peaks_mz_pval)

    ###add some plots about for how many features we are certain/uncertain about the peak selection
    if(plot == T)
    {
      temp_score <- alignment_score_peaks[which(!grepl("_d|_d_i",rownames(alignment_score_peaks))),]
      temp_score <- rbind(colSums(temp_score > alignment_scores_cutoff,na.rm=T),
                          colSums(temp_score < alignment_scores_cutoff,na.rm=T))
      rownames(temp_score) <- c("certain","uncertain")

      main <- ifelse(corrected_alignment == T,"Certainty of peak selection (corrected)","Certainty of peak selection (raw)")

      if(ncol(temp_score) <= 15) ###only 15 samples can be plotted in one plot
      {
        p <- Barplotsstacked(temp_score,AvgLine = F,col=c("chocolate","grey"),shownumbers_total = F,shownumbers = T,Legends = rownames(temp_score),main=main,ylab="Count",ylim=c(0,max(colSums(temp_score,na.rm=T))),Legendtitle = "Peak selection")
      }else ###if more samples, split samples up
      {
        pages <- ceiling(ncol(temp_score)/15)
        for(p in 1:pages)
        {
          columns <- (((p-1)*15)+1):(p*15)
          columns <- columns[which(columns <= ncol(temp_score))]
          p <- Barplotsstacked(temp_score[,columns],AvgLine = F,col=c("chocolate","grey"),shownumbers_total = F,shownumbers = T,Legends = rownames(temp_score),main=main,ylab="Count",ylim=c(0,max(colSums(temp_score,na.rm=T))),Legendtitle = "Peak selection")
        }
      }
    }

    return(alignment_score_peaks)
  }
  #perform alignment scoring based on corrected mz and RT per feature/sample
  alignment_scores_peaks_correct <- Alignment_scoring(peaks,features,samples,sd_RT = delta_rt,sd_mz = delta_mz,corrected_alignment = T,alignment_scores_cutoff=alignment_scores_cutoff)
  #perform alignment scoring based on raw mz and RT per feature/sample to detect unexpected complete outliers
  alignment_scores_peaks_raw <- Alignment_scoring(peaks,features,samples,sd_RT = delta_rt,sd_mz = delta_mz,corrected_alignment = F,alignment_scores_cutoff=alignment_scores_cutoff)

  ###Calculate score based on cooccurence of isotope peaks
  if(any(grepl("_i",features$Feature_name) & !grepl("_d_i",features$Feature_name)))
  {
    Mono_iso_alignment_scoring <- function(features,samples,peaks,pval_signal_with_background_quant,delta_rt,delta_mz,mono_iso_alignment_cutoff,plot=T)
    {
      #prepare dataframe into which all results are saved
      mono_iso_alignment_summary <- base::as.data.frame(matrix(nrow=nrow(features),ncol=length(samples)))
      rownames(mono_iso_alignment_summary) <- features$Feature_name
      for(c in 1:ncol(mono_iso_alignment_summary))
      {
        mono_iso_alignment_summary[,c] <- as.numeric(mono_iso_alignment_summary[,c])
      }

      if(length(which(peaks$known == 1)) >= 10)
      {
        zscore_to_pval <- function(z, alternative="greater")
        {

          if(alternative == "greater")
          {
            pval = stats::pnorm(-z)
          }else if(alternative == "two.sided")
          {
            pval = stats::pnorm(-abs(z))
            pval = 2 * pval
          }else if(alternative=="less")
          {
            pval = stats::pnorm(z)
          }
          return(pval)
        }

        ###estimate expected distributions of deviations in RT and mz between M and M+1 peaks
        mean_RT_distribution <- vector("numeric",length(samples))
        sd_RT_distribution <- vector("numeric",length(samples))
        mean_mz_distribution <- vector("numeric",length(samples))
        sd_mz_distribution <- vector("numeric",length(samples))
        for(s in 1:length(samples))
        {
          temp <- peaks[which(peaks$sample == samples[s]),]
          #add quantification scores
          temp$quant_score <- pval_signal_with_background_quant[match(temp$feature,rownames(pval_signal_with_background_quant)),s]
          #add charge state
          temp$charge <- features$Charge[match(features$Feature_name,temp$feature)]
          #selected peaks RT and mz
          temp_selected <- temp[which(temp$peak == temp$selected),]
          #split features into mono and +1 isotopes
          temp_selected_mono <- temp_selected[which(!grepl("_i",temp_selected$feature)),]
          temp_selected_iso <- temp_selected[which(grepl("_i",temp_selected$feature)),]
          temp_selected_iso$feature <- base::gsub("_i","",temp_selected_iso$feature)
          #combine both and only keep features for which we also have +1 isotope features
          combined <- dplyr::inner_join(temp_selected_mono,temp_selected_iso,by="feature")
          #determine expected variation between mono and +1 isotopes based on true identifications and significantly quantified
          #mono and +1 isotope features
          true <- combined[which(combined$known.x == 1 & combined$quant_score.x < 0.05 & combined$quant_score.y < 0.05),]
          true$delta_rt_iso_mono <- true$RT.y-true$RT.x
          true$delta_mz_iso_mono <- (((true$mz.y*true$charge.y)-1.002054)/true$charge.y)-true$mz.x

          mean_RT_distribution[s] <- mean(true$delta_rt_iso_mono,na.rm=T)
          sd_RT_distribution[s] <- stats::sd(true$delta_rt_iso_mono,na.rm=T)
          mean_mz_distribution[s] <- mean(true$delta_mz_iso_mono,na.rm=T)
          sd_mz_distribution[s] <- stats::sd(true$delta_mz_iso_mono,na.rm=T)
        }
        #estimate normal distribution of deviations in RT and mz between mono and +1 isotope peak
        RT_deviation_norm <- base::data.frame(x=seq(-delta_rt*3, delta_rt*3, length=1000),
                                        y=stats::dnorm(seq(-delta_rt*3, delta_rt*3, length=1000), mean=mean(mean_RT_distribution,na.rm=T), sd=mean(sd_RT_distribution,na.rm=T)))
        mz_deviation_norm <- base::data.frame(x=seq(-delta_mz*3,delta_mz*3, length=1000),
                                        y=stats::dnorm(seq(-delta_mz*3,delta_mz*3, length=1000), mean=mean(mean_mz_distribution,na.rm=T), sd=mean(sd_mz_distribution,na.rm=T)))
        graphics::plot(RT_deviation_norm$x,RT_deviation_norm$y,type="l",xlab="RT(M+1) - RT(M)",main="RT-deviation M vs M+1 peaks - true quantifications",ylab="Density")
        graphics::abline(v=mean(true$delta_rt_iso_mono),lty=2)
        graphics::plot(mz_deviation_norm$x,mz_deviation_norm$y,type="l",xlab="mz(M+1) - mz(M)",main="mz-deviation M vs M+1 peaks - true quantifications",ylab="Density")
        graphics::abline(v=mean(true$delta_mz_iso_mono),lty=2)
        #now calculate for every available feature for which an isotope feature is available how well both peaks are aligned
        mean_RT_distribution_mean <- mean(mean_RT_distribution,na.rm=T)
        sd_RT_distribution_mean <- mean(sd_RT_distribution,na.rm=T)
        mean_mz_distribution_mean <- mean(mean_mz_distribution,na.rm=T)
        sd_mz_distribution_mean <- mean(sd_mz_distribution,na.rm=T)
        for(s in 1:length(samples))
        {
          temp <- peaks[which(peaks$sample == samples[s]),]
          #add quantification scores
          temp$quant_score <- pval_signal_with_background_quant[match(temp$feature,rownames(pval_signal_with_background_quant)),s]
          #add charge state
          temp$charge <- features$Charge[match(features$Feature_name,temp$feature)]
          #selected peaks RT and mz
          temp_selected <- temp[which(temp$peak == temp$selected),]
          #split features into mono and +1 isotopes
          temp_selected_mono <- temp_selected[which(!grepl("_i",temp_selected$feature)),]
          temp_selected_iso <- temp_selected[which(grepl("_i",temp_selected$feature)),]
          temp_selected_iso$feature <- base::gsub("_i","",temp_selected_iso$feature)
          #combine both and only keep features for which we also have +1 isotope features
          combined <- dplyr::inner_join(temp_selected_mono,temp_selected_iso,by="feature")
          combined$delta_rt_iso_mono <- combined$RT.y-combined$RT.x
          combined$delta_mz_iso_mono <- (((combined$mz.y*combined$charge.y)-1.002054)/combined$charge.y)-combined$mz.x
          #convert to z-score
          combined$z_delta_rt_iso_mono <- (combined$delta_rt_iso_mono-mean_RT_distribution_mean)/sd_RT_distribution_mean
          combined$z_delta_mz_iso_mono <- (combined$delta_mz_iso_mono-mean_mz_distribution_mean)/sd_mz_distribution_mean
          #convert to pvalues
          combined$RT_deviation_pval <- zscore_to_pval(combined$z_delta_rt_iso_mono,"two.sided")
          combined$mz_deviation_pval <- zscore_to_pval(combined$z_delta_mz_iso_mono,"two.sided")
          #store results
          combined$score <- matrixStats::rowMins(as.matrix(combined[,c("RT_deviation_pval","mz_deviation_pval")]),na.rm=T)

          mono_iso_alignment_summary[,s] <- combined$score[match(base::gsub("_i","",rownames(mono_iso_alignment_summary)),combined$feature)]
          colnames(mono_iso_alignment_summary)[s] <- samples[s]
        }

        mono_iso_alignment_summary[is.infinite(as.matrix(mono_iso_alignment_summary))] <- NA

        ###add some plots about for how many features we are certain about peak selection based on detected deviations to isotope peak selection
        if(plot == T)
        {
          temp_alignmentscores <- mono_iso_alignment_summary
          temp_alignmentscores <- temp_alignmentscores[which(!grepl("_d|_i|_d_i",rownames(temp_alignmentscores))),]
          RT_alignment_scores <- rbind(colSums(temp_alignmentscores > mono_iso_alignment_cutoff,na.rm=T),
                                       colSums(temp_alignmentscores < mono_iso_alignment_cutoff,na.rm=T))
          rownames(RT_alignment_scores) <- c("certain","uncertain")

          if(ncol(RT_alignment_scores) <= 15) ###only 15 samples can be plotted in one plot
          {
            p <- Barplotsstacked(RT_alignment_scores,AvgLine = F,col=c("chocolate","grey"),shownumbers_total = F,shownumbers = T,Legends = rownames(RT_alignment_scores),main="Certainty of peak selection based on mono/+1 isotope peak selection",ylab="Count",ylim=c(0,max(colSums(RT_alignment_scores,na.rm=T))),Legendtitle = "Peak selection")
          }else ###if more samples, split samples up
          {
            pages <- ceiling(ncol(RT_alignment_scores)/15)
            for(p in 1:pages)
            {
              columns <- (((p-1)*15)+1):(p*15)
              columns <- columns[which(columns <= ncol(RT_alignment_scores))]
              p <- Barplotsstacked(RT_alignment_scores[,columns],AvgLine = F,col=c("chocolate","grey"),shownumbers_total = F,shownumbers = T,Legends = rownames(RT_alignment_scores),main="Certainty of peak selection based on mono/+1 isotope peak selection",ylab="Count",ylim=c(0,max(colSums(RT_alignment_scores,na.rm=T))),Legendtitle = "Peak selection")
            }
          }
        }
      }else
      {
        print("Warning: The true peak for too few features is known. Skipping +1-isotope alignment scoring.")
      }

      return(mono_iso_alignment_summary)
    }

    mono_iso_alignment_summary <- Mono_iso_alignment_scoring(features,samples,peaks,pval_signal_with_background_quant,delta_rt,delta_mz,mono_iso_alignment_cutoff)
  }else
  {
    mono_iso_alignment_summary <- NA
  }

  FDR_peak_selection <- Peak_selection_FDR(num_features=500,features,samples,peaks,path_to_features,peak_quant,feature_with_background_intensity,peak_selected,delta_mz,delta_rt,peak_min_ion_count,num_peaks_store,alignment_scores_cutoff,n_cores,peak_decision,Alignment_scoring,plot=T)

  QC_data$FDR_peak_selection <- FDR_peak_selection

  if(any(!is.na(FDR_peak_selection$Large_Intensity_delta_FDR)))
  {
    for(i in 1:length(FDR_peak_selection$Total_FDR))
    {
      print(base::paste(samples[i]," - FDR of peak selection: ",round(FDR_peak_selection$Large_Intensity_delta_FDR[i],digits=1)," %",sep=""))
    }
  }

  grDevices::dev.off()

  print(paste0(Sys.time()," Finished scoring of alignment and quantification"))

  ###function to plot peak selections
  # Plot_feature_quantification <- function(path_to_graph_data,features,selected_feature,samples,peaks,num_peaks_store)
  # {
  #   e <- new.env()
  #
  #   max <- length(samples)
  #   pb <- tcltk::tkProgressBar(title = "Plot peak selections per sample",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
  #   start_time <- Sys.time()
  #   updatecounter <- 0
  #   time_require <- 0
  #
  #   for(s in 1:length(samples))
  #   {
  #     load(base::paste(path_to_graph_data,"\\",samples[s],"_feature_graphs.RData",sep=""),envir=e)
  #
  #     max2 <- length(selected_feature)
  #     pb2 <- tcltk::tkProgressBar(title = "Plot peak selections",label=base::paste( round(0/max2*100, 0),"% done"), min = 0,max = max2, width = 300)
  #     start_time2 <- Sys.time()
  #     updatecounter2 <- 0
  #     time_require2 <- 0
  #     for(f in selected_feature)
  #     {
  #       i <- which(selected_feature == f)
  #       temp <- e$peaks_graph[[as.character(f)]]
  #       if(!is.na(temp))
  #       {
  #         ###plot KDE
  #         image(temp$kdemap,main=base::paste(f,"-",samples[s]),xlab="RT",ylab="m/z",xlim=c(min(temp$kdemap$x),max(temp$kdemap$x)),ylim=c(min(temp$kdemap$y),max(temp$kdemap$y)))
  #         ###indicate where peaks were detected
  #         graphics::text(temp$maxima_select$RT,temp$maxima_select$mz,1:nrow(temp$maxima_select),col=ifelse(temp$maxima_select$known_peak == 1,"green","black"))
  #         ###expected window
  #         sel <- which(features$Feature_name == f)
  #         RT_expected <- features$RT[sel] + features[sel,which(grepl("RT_calibration.",colnames(features)) & grepl(samples[s],colnames(features)))]
  #         RT_window_width <- features$RT_length[sel]
  #         cur_RT_window <- c(RT_expected-(RT_window_width/2),RT_expected+(RT_window_width/2))
  #         mz_expected <- features$m.z[sel] + features[sel,which(grepl("mz_calibration.",colnames(features)) & grepl(samples[s],colnames(features)))]
  #         mz_window <- c(features$m.z_range_min[sel] + features[sel,which(grepl("mz_calibration.",colnames(features)) & grepl(samples[s],colnames(features)))],
  #                        features$m.z_range_max[sel] + features[sel,which(grepl("mz_calibration.",colnames(features)) & grepl(samples[s],colnames(features)))])
  #         delta_mz <- (mz_window[2]-mz_window[1])/2
  #
  #         graphics::rect(cur_RT_window[1],mz_window[1],cur_RT_window[2],mz_window[2],lty=2)
  #         graphics::points(RT_expected,mz_expected,pch=4)
  #         ###indicate which peak was selected
  #         temp_selected <- peaks[which(peaks$sample == samples[s] & peaks$feature == f & peaks$peak == peaks$selected),]
  #
  #         if(temp_selected$peak == (num_peaks_store+1)) #selected standard window
  #         {
  #           graphics::rect(cur_RT_window[1],mz_window[1],cur_RT_window[2],mz_window[2],lty=2,border="blue")
  #           intensity_temp <- temp$peak_intensities[nrow(temp$peak_intensities),1]
  #           graphics::text(RT_expected,mz_expected-(1.1*delta_mz),round(intensity_temp,2),col="blue")
  #         }else
  #         {
  #           rect_pos_x_1 <- temp$maxima_select$RT[temp_selected$peak]-RT_window_width/2
  #           rect_pos_y_1 <- temp$maxima_select$mz[temp_selected$peak]-delta_mz
  #           rect_pos_x_2 <- temp$maxima_select$RT[temp_selected$peak]+RT_window_width/2
  #           rect_pos_y_2 <- temp$maxima_select$mz[temp_selected$peak]+delta_mz
  #
  #
  #           graphics::rect(rect_pos_x_1,rect_pos_y_1,rect_pos_x_2,rect_pos_y_2,lty=2,border="blue")
  #           intensity_temp <- temp$peak_intensities[temp_selected$peak,1]
  #           graphics::text(temp_selected$RT,temp_selected$mz-(1.1*delta_mz),round(intensity_temp,2),col="blue")
  #         }
  #       }
  #
  #       updatecounter2 <- updatecounter2 + 1
  #       if(updatecounter2 >= 1)
  #       {
  #         time_elapsed <- difftime(Sys.time(),start_time2,units="secs")
  #         time_require2 <- (time_elapsed/(i/max2))*(1-(i/max2))
  #         td <- lubridate::seconds_to_period(time_require2)
  #         time_require2 <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))
  #
  #         updatecounter <- 0
  #         tcltk::setTkProgressBar(pb2, i, label=base::paste( round(i/max2*100, 0)," % done (",i,"/",max2,", Time require: ",time_require2,")",sep = ""))
  #       }
  #     }
  #     close(pb2)
  #     updatecounter <- updatecounter + 1
  #     if(updatecounter >= 1)
  #     {
  #       time_elapsed <- difftime(Sys.time(),start_time,units="secs")
  #       time_require <- (time_elapsed/(s/max))*(1-(s/max))
  #       td <- lubridate::seconds_to_period(time_require)
  #       time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))
  #
  #       updatecounter <- 0
  #       tcltk::setTkProgressBar(pb, s, label=base::paste( round(s/max*100, 0)," % done (",s,"/",max,", Time require: ",time_require,")",sep = ""))
  #     }
  #   }
  #   close(pb)
  # }
  #
  # f <- c("Feature_16094","Feature_27778")
  # View(peaks[which(peaks$feature%in%f & peaks$peak == peaks$selected),])
  # features_select[which(features_select$Feature_name == f),]
  # Plot_feature_quantification(path_to_graph_data=base::paste(path_to_mzXML,"\\all_ion_lists\\Extracted feature intensities",output_file_names_add,sep=""),
  #                             features=features,
  #                             selected_feature=f,
  #                             samples=samples[c(13:18)],
  #                             peaks=peaks,
  #                             num_peaks_store=num_peaks_store)


  ###evaluate alignment performance
  #after peak alignment
  peaks_selected <- peaks[which(peaks$peak == peaks$selected),]
  #raw peaks
  peaks_raw <- peaks[which(peaks$peak == 6),]
  select_only_known <- which(base::paste(peaks_raw$sample,peaks_raw$feature) %in% base::paste(peaks_selected$sample,peaks_selected$feature) )
  peaks_raw <- peaks_raw[select_only_known,]

  #deviation in raw data
  temp_mean_raw <- stats::aggregate(peaks_raw[,c("RT","mz","RT_correct","mz_correct")],by=list(peaks_raw$feature),FUN=mean,na.rm=T)
  temp_sd_raw <- stats::aggregate(peaks_raw[,c("RT","mz","RT_correct","mz_correct")],by=list(peaks_raw$feature),FUN=stats::sd,na.rm=T)
  #deviation after alignment
  temp_mean_aligned <- stats::aggregate(peaks_selected[,c("RT","mz","RT_correct","mz_correct")],by=list(peaks_selected$feature),FUN=mean,na.rm=T)
  temp_sd_aligned <- stats::aggregate(peaks_selected[,c("RT","mz","RT_correct","mz_correct")],by=list(peaks_selected$feature),FUN=stats::sd,na.rm=T)
  #exclude features where sd in uncorrected RT > 1 min
  sel <- which(temp_sd_raw$RT > 1)
  temp_mean_raw <- temp_mean_raw[-sel,]
  temp_sd_raw <- temp_sd_raw[-sel,]
  temp_mean_aligned <- temp_mean_aligned[-sel,]
  temp_sd_aligned <- temp_sd_aligned[-sel,]
  grDevices::pdf("Performance of feature alignment.pdf",useDingbats = F)

  tryCatch({
    graphics::boxplot(temp_sd_raw$RT,temp_sd_aligned$RT_correct,outline=F,main="Variability of feature RT",names = c("Raw","Aligned"),ylab="SD of RT [min]")
    graphics::boxplot(temp_sd_raw$mz,temp_sd_aligned$mz_correct,outline=F,main="Variability of feature mz",names = c("Raw","Aligned"),ylab="SD of mz [Da]")
  },
  error=function(cond) {
    warning(paste("Error while plotting performance of feature alignment. Original message \n",cond))
  })

  grDevices::dev.off()


  ###Save temporary results
  print(paste0(Sys.time()," Save quantification results after alignment scoring"))
  temp_results <- list(features=features,
                       features_intensity=features_intensity,
                       Ioncount_feature_sample_matrix=Ioncount_feature_sample_matrix,
                       feature_with_background_intensity=feature_with_background_intensity,
                       Ioncount_feature_with_background_intensity=Ioncount_feature_with_background_intensity,
                       peak_selected=peak_selected,
                       S2B=S2B,
                       pval_signal_with_background_quant=pval_signal_with_background_quant,
                       alignment_variability_score=alignment_variability_score,
                       alignment_scores_peaks_correct=alignment_scores_peaks_correct,
                       alignment_scores_peaks_raw=alignment_scores_peaks_raw,
                       mono_iso_alignment_summary=mono_iso_alignment_summary,
                       peaks=peaks,
                       QC_data)
  setwd(base::paste(path_to_features,"/Temporary_files",sep=""))
  save(temp_results,file = "Quantification_raw_results_with_scores.RData")

  crap <- gc(F)
  ###load temporary results
  # setwd(base::paste(path_to_features,"\\Temporary_files",sep=""))
  # load("Quantification_raw_results_with_scores.RData")
  # features <- temp_results$features
  # features_intensity <- temp_results$features_intensity
  # Ioncount_feature_sample_matrix <- temp_results$Ioncount_feature_sample_matrix
  # feature_with_background_intensity <- temp_results$feature_with_background_intensity
  # feature_with_background_intensity_imputed <- temp_results$feature_with_background_intensity_imputed
  # Ioncount_feature_with_background_intensity <- temp_results$Ioncount_feature_with_background_intensity
  # peak_selected <- temp_results$peak_selected
  # S2B <- temp_results$S2B
  # pval_signal_with_background_quant <- temp_results$pval_signal_with_background_quant
  # alignment_variability_score <- temp_results$alignment_variability_score
  # alignment_scores_peaks_correct <- temp_results$alignment_scores_peaks_correct
  # alignment_scores_peaks_raw <- temp_results$alignment_scores_peaks_raw
  # mono_iso_alignment_summary <- temp_results$mono_iso_alignment_summary
  # peaks <- temp_results$peaks
  ###remove decoy ions from results
  selection <- which(!grepl("_d",features$Feature_name))
  if(length(selection) > 0)
  {
    features<-features[selection,]
    Ioncount_feature_sample_matrix<-Ioncount_feature_sample_matrix[selection,]
    feature_with_background_intensity<-feature_with_background_intensity[selection,]
    Ioncount_feature_with_background_intensity<-Ioncount_feature_with_background_intensity[selection,]
    peak_selected<-peak_selected[selection,]
    S2B<-S2B[selection,]
    pval_signal_with_background_quant<-pval_signal_with_background_quant[selection,]
    alignment_variability_score<-alignment_variability_score[selection,]
    alignment_scores_peaks_correct<-alignment_scores_peaks_correct[selection,]
    alignment_scores_peaks_raw<-alignment_scores_peaks_raw[selection,]
    if(any(!is.na(mono_iso_alignment_summary)))mono_iso_alignment_summary<-mono_iso_alignment_summary[selection,]
  }

  ###remove isotope features which never show significant quantification
  selection <- which(grepl("_i",features$Feature_name) & matrixStats::rowMins(as.matrix(pval_signal_with_background_quant),na.rm=T) > Quant_pVal_cut)

  total_length <- length(which(grepl("_i",features$Feature_name)))
  if(length(selection) > 0)
  {
    features<-features[-selection,]
    Ioncount_feature_sample_matrix<-Ioncount_feature_sample_matrix[-selection,]
    feature_with_background_intensity<-feature_with_background_intensity[-selection,]
    Ioncount_feature_with_background_intensity<-Ioncount_feature_with_background_intensity[-selection,]
    peak_selected<-peak_selected[-selection,]
    S2B<-S2B[-selection,]
    pval_signal_with_background_quant<-pval_signal_with_background_quant[-selection,]
    alignment_variability_score<-alignment_variability_score[-selection,]
    alignment_scores_peaks_correct<-alignment_scores_peaks_correct[-selection,]
    alignment_scores_peaks_raw<-alignment_scores_peaks_raw[-selection,]
    if(any(!is.na(mono_iso_alignment_summary)))mono_iso_alignment_summary<-mono_iso_alignment_summary[-selection,]
    print(base::paste(Sys.time(),"Removed ",length(selection)," isotope features (",round(length(selection)/total_length*100,digits=1)," %) as they dont show significant ion accumulation in any sample.",sep=""))
  }

  ####impute missing values based on generalized additive model
  impute_feature_level_quant <- function(data,features,background_intensity_GAM_table_per_feature_sum)
  {
    #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
    quant_data <- data.table::copy(data)
    background_intensity_GAM_table_per_feature <- background_intensity_GAM_table_per_feature_sum

    for(c in 1:ncol(quant_data))
    {
      missing_vals_rows <- which(is.na(quant_data[,c]))

      impute_vals <- background_intensity_GAM_table_per_feature[match(rownames(quant_data)[missing_vals_rows],rownames(background_intensity_GAM_table_per_feature)),c]

      impute_vals <- log10(2^impute_vals)

      data.table::set(quant_data,i=as.integer(missing_vals_rows),j=as.integer(c),value=impute_vals)
    }

    return(quant_data)
  }

  feature_with_background_intensity_imputed <- impute_feature_level_quant(data=feature_with_background_intensity,
                                                                          features=features,
                                                                          background_intensity_GAM_table_per_feature_sum=QC_data$Decoy_feature_parameters$background_intensity_GAM_table_per_feature_sum)

  ##now perform filtering of quantifications based on scores
  #remove all features with too high variability
  selection <- rownames(alignment_variability_score)[which(matrixStats::rowMins(as.matrix(alignment_variability_score),na.rm=T) < alignment_variability_score_cutoff)]
  if(length(selection)>0)
  {
    print(base::paste(Sys.time(),"Removed ",length(which(!grepl("_i",selection)))," (",length(which(grepl("_i",selection)))," isotope)"," features from quantification results due to too high variability in alignment between samples.",sep=""))

    selection <- match(selection,features$Feature_name)

    features<-features[-selection,]
    Ioncount_feature_sample_matrix<-Ioncount_feature_sample_matrix[-selection,]
    feature_with_background_intensity<-feature_with_background_intensity[-selection,]
    feature_with_background_intensity_imputed<-feature_with_background_intensity_imputed[-selection,]
    Ioncount_feature_with_background_intensity<-Ioncount_feature_with_background_intensity[-selection,]
    peak_selected<-peak_selected[-selection,]
    S2B<-S2B[-selection,]
    pval_signal_with_background_quant<-pval_signal_with_background_quant[-selection,]
    alignment_variability_score<-alignment_variability_score[-selection,]
    alignment_scores_peaks_correct<-alignment_scores_peaks_correct[-selection,]
    alignment_scores_peaks_raw<-alignment_scores_peaks_raw[-selection,]
    if(any(!is.na(mono_iso_alignment_summary)))mono_iso_alignment_summary<-mono_iso_alignment_summary[-selection,]
  }

  #remove all quantifications per sample with uncertain peak selection

  for(s in 1:length(samples))
  {
    temp_scores <- alignment_scores_peaks_correct[,which(grepl(samples[s],colnames(alignment_scores_peaks_correct))),drop=F]
    temp_scores_raw <- alignment_scores_peaks_raw[,which(grepl(samples[s],colnames(alignment_scores_peaks_correct))),drop=F]
    selection <- which(temp_scores < alignment_scores_cutoff)# | temp_scores_raw < alignment_scores_cutoff)
    names(selection) <- rownames(temp_scores)[selection]

    feature_with_background_intensity[selection,s] <- NA
    feature_with_background_intensity_imputed[selection,s] <- NA

    print(base::paste(samples[s],": Removed ",length(which(!grepl("_i",names(selection))))," (",length(which(grepl("_i",names(selection))))," isotope)"," quantifications (",round(length(selection)/nrow(alignment_scores_peaks_correct)*100,digits=1)," %) due to uncertain peak selection",sep=""))
  }

  #remove isotope quantifications for uncertain mono-+1-isos but keep mono quantification if otherwise certain of peak selection
  if(any(!is.na(mono_iso_alignment_summary)))
  {
    for(s in 1:length(samples))
    {
      if(length(which(peaks$known == 1)) >= 10)
      {
        temp_scores <- mono_iso_alignment_summary[,which(grepl(samples[s],colnames(mono_iso_alignment_summary)))]
        selection <- which(temp_scores < mono_iso_alignment_cutoff & grepl("_i",rownames(mono_iso_alignment_summary)))
        names(selection) <- rownames(mono_iso_alignment_summary)[selection]

        #how many of these still have a quantification (not already removed by previous filtering step)
        temp_quant <- feature_with_background_intensity[selection,s]

        feature_with_background_intensity[selection,s] <- NA
        feature_with_background_intensity_imputed[selection,s] <- NA

        print(base::paste(samples[s],": Removed ",length(which(!is.na(temp_quant)))," isotope quantifications (",round(length(which(!is.na(temp_quant)))/nrow(alignment_scores_peaks_correct)*100,digits=1)," %) due to discrepancy in peak selection between mono-/+1-isotope features",sep=""))
      }else ### not enough known peaks ... remove all isotopes
      {
        selection <- which(grepl("_i",rownames(feature_with_background_intensity)))
        feature_with_background_intensity[selection,s] <- NA
        feature_with_background_intensity_imputed[selection,s] <- NA

        print(base::paste(samples[s],": Removed all isotope quantifications due to too few known true peaks.",sep=""))
      }

    }
  }

  #calculate fraction of missing values before and after imputation
  total <- nrow(feature_with_background_intensity)*ncol(feature_with_background_intensity)
  missing <- length(which(is.na(as.numeric(as.matrix(feature_with_background_intensity)))))
  print(base::paste(Sys.time(),"Quantification without imputation: ",round(missing/total*100,digits=1)," % missing values",sep=""))
  missing <- length(which(is.na(as.numeric(as.matrix(feature_with_background_intensity_imputed)))))
  print(base::paste(Sys.time(),"Quantification with imputation: ",round(missing/total*100,digits=1)," % missing values",sep=""))

  ####If features contain PMPs (potentially missed peptides) then evaluate now which of them are correlating (spearman, rank) in abundance to the other peptides of the same proteins
  # if(any(grepl("_pmp",features$Feature_name)))
  # {
  #   cor_p_cut <- 0.05
  #   selection <- which(grepl("_pmp",features$Feature_name))
  #   PMPs <- list(features=features[selection,],
  #                features_intensity=features_intensity[selection,],
  #                Ioncount_feature_sample_matrix=Ioncount_feature_sample_matrix[selection,],
  #                feature_with_background_intensity=feature_with_background_intensity[selection,],
  #                Ioncount_feature_with_background_intensity=Ioncount_feature_with_background_intensity[selection,],
  #                Alignment_scores=Alignment_scores[selection,],
  #                Quant_pvals_signal_with_background=Quant_pvals_signal_with_background[selection,])
  #
  #   ###go through all proteins with PMPs and evaluate if PMPs are correlating with known peptides of the respective protein
  #   remove_list <- vector(mode="numeric",length(PMPs$features$Feature_name))
  #   remove_list[remove_list==0]<-NA
  #   count_remove <- 0
  #
  #   unique_PMP_proteins <- unique(PMPs$features$Protein)
  #
  #   max <- length(unique_PMP_proteins)
  #   pb <- tcltk::tkProgressBar(title = "Detect significantly correlating PMPs",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
  #   start_time <- Sys.time()
  #   updatecounter <- 0
  #   time_require <- 0
  #   pbi <- 0
  #   for(p in unique_PMP_proteins)
  #   {
  #     selection <- which(features$Protein == p & !grepl("_i",features$Feature_name))
  #     temp <- features[selection,]
  #     temp_quant <- feature_with_background_intensity_imputed[selection,]
  #
  #     pmp_features <- which(grepl("_pmp",temp$Feature_name))
  #
  #     correlation <- rcorr(t(temp_quant),type="pearson")
  #
  #     correlation$P[correlation$P==0]<-NA
  #
  #     remove <- vector(mode="logical",length(pmp_features))
  #     for(i in pmp_features)
  #     {
  #       if(!any(correlation$P[i,-pmp_features] < cor_p_cut,na.rm=T))
  #       {
  #         remove[which(pmp_features==i)] <- T
  #       }
  #     }
  #
  #     if(any(remove)) ###should any of the pmp features be removed
  #     {
  #       remove <- selection[pmp_features[which(remove==T)]]
  #       remove_list[(count_remove+1):(count_remove+length(remove))] <- remove
  #       count_remove <- count_remove + length(remove)
  #     }
  #
  #     pbi <- pbi + 1
  #
  #     updatecounter <- updatecounter + 1
  #     if(updatecounter >= 10)
  #     {
  #       time_elapsed <- difftime(Sys.time(),start_time,units="secs")
  #       time_require <- (time_elapsed/(pbi/max))*(1-(pbi/max))
  #       td <- lubridate::seconds_to_period(time_require)
  #       time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))
  #
  #       updatecounter <- 0
  #       tcltk::setTkProgressBar(pb, pbi, label=base::paste( round(pbi/max*100, 0)," % done (",pbi,"/",max,", Time require: ",time_require,")",sep = ""))
  #     }
  #   }
  #   close(pb)
  #
  #   remove_list <- remove_list[1:count_remove]
  #
  #   print(base::paste(nrow(PMPs$features)-length(remove_list),"of PMPs show significant correlation with peptides of respective proteins."))
  #
  #   ###now remove PMPs which are not at all correlating with any peptide of the same protein
  #   if(length(remove_list)>0)
  #   {
  #     ###store excluded features and quant
  #     excluded_PMPs <- list(features=features[remove_list,],
  #                           features_intensity=features_intensity[remove_list,],
  #                           Ioncount_feature_sample_matrix=Ioncount_feature_sample_matrix[remove_list,],
  #                           feature_with_background_intensity=feature_with_background_intensity[remove_list,],
  #                           feature_with_background_intensity_imputed=feature_with_background_intensity_imputed[remove_list,],
  #                           Ioncount_feature_with_background_intensity=Ioncount_feature_with_background_intensity[remove_list,],
  #                           Alignment_scores=Alignment_scores[remove_list,],
  #                           Quant_pvals_signal_with_background=Quant_pvals_signal_with_background[remove_list,])
  #     save(excluded_PMPs,file = "excluded_PMPs.RData")
  #
  #     ###keep all the other features and continue
  #     features<-features[-remove_list,]
  #     features_intensity<-features_intensity[-remove_list,]
  #     Ioncount_feature_sample_matrix<-Ioncount_feature_sample_matrix[-remove_list,]
  #     feature_with_background_intensity<-feature_with_background_intensity[-remove_list,]
  #     feature_with_background_intensity_imputed<-feature_with_background_intensity_imputed[-remove_list,]
  #     Ioncount_feature_with_background_intensity<-Ioncount_feature_with_background_intensity[-remove_list,]
  #     Alignment_scores<-Alignment_scores[-remove_list,]
  #     Quant_pvals_signal_with_background<-Quant_pvals_signal_with_background[-remove_list,]
  #   }
  # }
  #
  # ###Filter for isotope features which at least in one sample show significant quantification
  # if(any(grepl("_i",features$Feature_name)))
  # {
  #   selection <- which(grepl("_i",features$Feature_name))
  #   quant_pvals_temp <- Quant_pvals_signal_with_background[selection,]
  #   remove_list <- which(matrixStats::rowMins(as.matrix(quant_pvals_temp),na.rm=T)>0.01)
  #   remove_list <- selection[remove_list]
  #
  #   if(length(remove_list)>0)
  #   {
  #     features<-features[-remove_list,]
  #     features_intensity<-features_intensity[-remove_list,]
  #     Ioncount_feature_sample_matrix<-Ioncount_feature_sample_matrix[-remove_list,]
  #     feature_with_background_intensity<-feature_with_background_intensity[-remove_list,]
  #     feature_with_background_intensity_imputed<-feature_with_background_intensity_imputed[-remove_list,]
  #     Ioncount_feature_with_background_intensity<-Ioncount_feature_with_background_intensity[-remove_list,]
  #     Alignment_scores<-Alignment_scores[-remove_list,]
  #     Quant_pvals_signal_with_background<-Quant_pvals_signal_with_background[-remove_list,]
  #   }
  # }

  ###store the updated results
  # setwd(base::paste(path_to_features,"\\Temporary_files",sep=""))
  # utils::write.table(features_intensity,base::paste("Features_quantification_signal_only",output_file_names_add,".txt",sep=""))
  # utils::write.table(Ioncount_feature_sample_matrix,base::paste("Features_quantification_ion_counts_signal_only",output_file_names_add,".txt",sep=""))
  # utils::write.table(feature_with_background_intensity,base::paste("Features_quantification_signal_background",output_file_names_add,".txt",sep=""))
  # utils::write.table(feature_with_background_intensity,base::paste("Features_quantification_signal_background_imputed",output_file_names_add,".txt",sep=""))
  # utils::write.table(Ioncount_feature_with_background_intensity,base::paste("Features_quantification_ion_count_signal_background",output_file_names_add,".txt",sep=""))
  # utils::write.table(Alignment_scores,base::paste("Alignment_scores",output_file_names_add,".txt",sep=""))
  # utils::write.table(Quant_pvals_signal_with_background,base::paste("Features_quantification_pvals_signal_background",output_file_names_add,".txt",sep=""))
  #

  ###store results after filtering
  print(paste0(Sys.time()," Save quantification results after alignment scoring and filter"))

  temp_results <- list(features=features,
                       features_intensity=features_intensity,
                       Ioncount_feature_sample_matrix=Ioncount_feature_sample_matrix,
                       feature_with_background_intensity=feature_with_background_intensity,
                       feature_with_background_intensity_imputed=feature_with_background_intensity_imputed,
                       Ioncount_feature_with_background_intensity=Ioncount_feature_with_background_intensity,
                       peak_selected=peak_selected,
                       S2B=S2B,
                       pval_signal_with_background_quant=pval_signal_with_background_quant,
                       alignment_variability_score=alignment_variability_score,
                       alignment_scores_peaks_correct=alignment_scores_peaks_correct,
                       alignment_scores_peaks_raw=alignment_scores_peaks_raw,
                       mono_iso_alignment_summary=mono_iso_alignment_summary,
                       QC_data)
  setwd(base::paste(path_to_features,"/Temporary_files",sep=""))
  save(temp_results,file = "Quantification_raw_results_with_scores_filtered.RData")

  # path_to_features <- "F:\\9_Spike_in_data_sets\\4_spike-in human Shen\\Requant\\19 - DDAicer reprocessed"
  # path_to_MaxQ_output <- "F:\\9_Spike_in_data_sets\\4_spike-in human Shen\\MaxQuant"
  # output_file_names_add <- "_DDAiceR_analysis"
  # n_cores <- 3
  # abundance_estimation_correction <- T
  # calc_protein_LFQ <- T
  # calc_peptide_LFQ <- F
  #
  # setwd(base::paste(path_to_features,"\\Temporary_files",sep=""))
  # load("Quantification_raw_results_with_scores_filtered.RData")
  # features <- temp_results$features
  # features_intensity <- temp_results$features_intensity
  # Ioncount_feature_sample_matrix <- temp_results$Ioncount_feature_sample_matrix
  # feature_with_background_intensity <- temp_results$feature_with_background_intensity
  # feature_with_background_intensity_imputed <- temp_results$feature_with_background_intensity_imputed
  # Ioncount_feature_with_background_intensity <- temp_results$Ioncount_feature_with_background_intensity
  # peak_selected <- temp_results$peak_selected
  # S2B <- temp_results$S2B
  # pval_signal_with_background_quant <- temp_results$pval_signal_with_background_quant
  # alignment_variability_score <- temp_results$alignment_variability_score
  # alignment_scores_peaks_correct <- temp_results$alignment_scores_peaks_correct
  # mono_iso_alignment_summary <- temp_results$mono_iso_alignment_summary
  # QC_data <- temp_results$QC_data
  # library(stringr)
  # library(doParallel)
  ###Perform protein level quantification

  correct_intensities <- function(features,feature_sample_matrix_requantified,pval_quant,MaxQ_peptides_quant,main="",corr_factor=NA)
  {
    #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
    #suppressWarnings(suppressMessages(library(dplyr,quietly = T)))

    if(any(is.na(corr_factor))) ###no correction factor specified so correction will be determined and then applied
    {
      ###select monoisotopic features with significant quantification for determination of general trends in difference between MaxQ and Requant quantification
      select <- which(!grepl("_i|_d",features$Feature_name))
      feature_quant <- feature_sample_matrix_requantified[select,]
      feature_quant_pval <- pval_quant[select,]
      feature_quant[is.na(feature_quant_pval) | feature_quant_pval > 0.1] <- NA
      features_select <- features[select,]
      ###determine deviations between Requant and MaxQ in abundance estimations
      Requant_peptides_quant_seq <- stats::aggregate(10^feature_quant,by=list(Sequence=features_select$Sequence),FUN=sum)
      Requant_peptides_quant_seq[,-1] <- base::log2(Requant_peptides_quant_seq[,-1])
      ###bring MaxQ results table into same order as Requant output
      if(ncol(MaxQ_peptides_quant) != ncol(Requant_peptides_quant_seq))
      {
        colnames(MaxQ_peptides_quant) <- base::gsub("Intensity.","",colnames(MaxQ_peptides_quant))
        ordering <- vector("numeric",ncol(MaxQ_peptides_quant))
        for(i in 1:ncol(MaxQ_peptides_quant))
        {
          overlap <- grepl(base::paste(colnames(MaxQ_peptides_quant)[i],"$",sep=""),colnames(Requant_peptides_quant_seq))
          ordering[i] <- ifelse(any(overlap),which(overlap == T),NA)
        }
        MaxQ_peptides_quant <- MaxQ_peptides_quant[,which(!is.na(ordering))]
        ordering <- ordering[!is.na(ordering)]
        MaxQ_peptides_quant <- MaxQ_peptides_quant[,ordering]
      }
      comb <- dplyr::full_join(MaxQ_peptides_quant,Requant_peptides_quant_seq,by="Sequence")

      ncolumns <- ncol(feature_sample_matrix_requantified)

      dat <- base::as.data.frame(matrix(ncol=2,nrow=nrow(comb)*ncolumns))
      dat[,1] <- as.numeric(dat[,1])
      dat[,2] <- as.numeric(dat[,2])
      for(i in 1:ncolumns)
      {
        start_index <- (i-1)*nrow(comb)+1
        end_index <- i*nrow(comb)
        temp <- base::as.data.frame(cbind(comb[,i+1],comb[,i+1+ncolumns]))
        data.table::set(dat,as.integer(start_index:end_index),j=as.integer(1:2),value=temp)
      }
      colnames(dat) <- c("MaxQ","Requant")
      dat <- stats::na.omit(dat)
      if(main != "")main=base::paste(" ",main,sep="")
      grDevices::pdf(base::paste("Correct feature abundance estimations",main,".pdf",sep=""))
      graphics::smoothScatter(dat[,1],dat[,2],ylab="Requant, log2",xlab="MaxQ, log2",main="Correlation of MaxQ vs Requant on peptide level")
      graphics::abline(a=0,b=1)
      temp <- dat[c(order(dat[,2],decreasing = T)[1:(0.1*nrow(dat))],order(dat[,2],decreasing = F)[1:(0.1*nrow(dat))]),]
      y <- temp[,2]
      x <- temp[,1]
      fit <- stats::lm(y~x)
      graphics::abline(fit,lty=2,col="red")
      m <- summary(fit)$coefficients[2,1]
      intercept <- summary(fit)$coefficients[1,1]
      Rsq <- summary(fit)$r.squared
      posx <- graphics::par("usr")[1]+(graphics::par("usr")[2]-graphics::par("usr")[1])*0.15
      posy <- graphics::par("usr")[4]-(graphics::par("usr")[4]-graphics::par("usr")[3])*0.1
      graphics::text(posx,posy,labels = base::paste("R? =",round(Rsq,digits=2),"\nslope =",round(m,digits=2)))
      grDevices::dev.off()

      ###correction
      features_sample_matrix_corrected <- base::as.data.frame(base::log2(10^feature_sample_matrix_requantified))

      ###correct slope
      correction <- 1/m
      for(c in 1:ncol(features_sample_matrix_corrected))
      {
        features_sample_matrix_corrected[,c] <- (features_sample_matrix_corrected[,c]*correction)
      }
      return(list(features_sample_matrix_corrected,
                  correction_data=dat,
                  correction_fit=fit,
                  correction_factor=correction))
    }else ###correction factor is supplied so correct data accordingly
    {
      ###correction
      features_sample_matrix_corrected <- base::as.data.frame(base::log2(10^feature_sample_matrix_requantified))

      for(c in 1:ncol(features_sample_matrix_corrected))
      {
        features_sample_matrix_corrected[,c] <- features_sample_matrix_corrected[,c]*corr_factor
      }
      return(list(features_sample_matrix_corrected,
                  correction_factor=corr_factor))
    }

  }

  Top3_Protein_Quant <- function(features,feature_sample_matrix_requantified,Alignment_scores=NULL,Quant_pvals=NULL,S2B=NULL,use_overlapping=T,min_peps=2,quant_pvalue_cutoff=0.1,use_isotope_pmps=F)
  {
    pb <- tcltk::tkProgressBar(title = "Prepare Top3 quantification",label=base::paste( round(0/1*100, 0),"% done"), min = 0,max = 1, width = 300)
    close(pb)
    #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
    #suppressWarnings(suppressMessages(library(lubridate,quietly = T)))
    #suppressWarnings(suppressMessages(library(matrixStats,quietly = T)))
    #suppressWarnings(suppressMessages(library(stringr,quietly = T)))

    feature_sample_matrix_requantified <- base::as.data.frame(feature_sample_matrix_requantified)
    if(!is.null(Quant_pvals))Quant_pvals <- base::as.data.frame(Quant_pvals)
    if(!is.null(Alignment_scores))Alignment_scores <- base::as.data.frame(Alignment_scores)
    if(!is.null(S2B))S2B <- base::as.data.frame(S2B)
    ##top3 method
    ###input matrix with samples in cols and rows correspond to unique peptides (log2 summed intensity over charge state and modification) of a respective protein
    Top3_quant <- function(pep_matrix,features_temp,Alignment_scores_temp=NULL,Quant_pvals_temp=NULL,S2B_temp=NULL)
    {
      sequence <- features_temp$Sequence

      if(length(sequence)>0)
      {
        pep_matrix <- stats::aggregate(2^pep_matrix, by=list(Sequence=sequence), FUN=sum,na.rm=T)
        pep_matrix <- pep_matrix[,-1]
        pep_matrix[pep_matrix==0] <- NA
        pep_matrix <- base::log2(pep_matrix)

        if(!is.null(Alignment_scores_temp))
        {
          score_matrix <- stats::aggregate(Alignment_scores_temp, by=list(Sequence=sequence), FUN=mean,na.rm=T)
          score_matrix <- score_matrix[,-1]
          score_matrix[score_matrix==0] <- NA
        }else
        {
          score_matrix <- matrix(nrow=nrow(pep_matrix),ncol=ncol(pep_matrix),NA)
          colnames(score_matrix) <- colnames(pep_matrix)
        }
        if(!is.null(Quant_pvals_temp))
        {
          pval_matrix <- stats::aggregate(Quant_pvals_temp, by=list(Sequence=sequence), FUN=mean,na.rm=T)
          pval_matrix <- pval_matrix[,-1]
          pval_matrix[pval_matrix==0] <- NA
        }else
        {
          pval_matrix <- matrix(nrow=nrow(pep_matrix),ncol=ncol(pep_matrix),NA)
          colnames(pval_matrix) <- colnames(pep_matrix)
        }
        if(!is.null(S2B_temp))
        {
          S2B_matrix <- stats::aggregate(S2B_temp, by=list(Sequence=sequence), FUN=mean,na.rm=T)
          S2B_matrix <- S2B_matrix[,-1]
          S2B_matrix[S2B_matrix==0] <- NA
        }else
        {
          S2B_matrix <- matrix(nrow=nrow(pep_matrix),ncol=ncol(pep_matrix),NA)
          colnames(S2B_matrix) <- colnames(pep_matrix)
        }

        top3_res <- NULL
        top3_score_res <- NULL
        top3_pval_res <- NULL
        top3_S2B_res <- NULL
        for(c in 1:ncol(pep_matrix))
        {
          top3 <- 2^pep_matrix[order(pep_matrix[,c],na.last = T,decreasing = T),c][1:3]
          sums <- sum(top3,na.rm=T)

          if(!is.null(Alignment_scores_temp))
          {
            top3_score <- score_matrix[order(pep_matrix[,c],na.last = T,decreasing = T),c][1:3]
            median_score <- stats::median(top3_score,na.rm=T)
          }else
          {
            median_score <- NA
          }

          if(!is.null(Quant_pvals_temp))
          {
            top3_pval <- pval_matrix[order(pep_matrix[,c],na.last = T,decreasing = T),c][1:3]
            median_pval <- stats::median(top3_pval,na.rm=T)
          }else
          {
            median_pval <- NA
          }

          if(!is.null(S2B_temp))
          {
            top3_S2B <- S2B_matrix[order(pep_matrix[,c],na.last = T,decreasing = T),c][1:3]
            median_S2B <- stats::median(top3_S2B,na.rm=T)
          }else
          {
            median_S2B <- NA
          }

          if(sums == 0)
          {
            sums <- NA
          }else
          {
            if(length(which(!is.na(top3)))>=min_peps) ### quantification only if at least 2 peptide quantifications are available
            {
              sums <- sums/length(which(!is.na(top3)))
            }else
            {
              sums <- NA
              median_score <- NA
              median_pval <- NA
              median_S2B <- NA
            }

          }
          top3_res <- append(top3_res,base::log2(sums))
          top3_score_res <- append(top3_score_res,median_score)
          top3_pval_res <- append(top3_pval_res,median_pval)
          top3_S2B_res <- append(top3_S2B_res,median_S2B)
        }
      }else
      {
        top3_res <- rep(NA,ncol(pep_matrix))
        top3_score_res <- rep(NA,ncol(score_matrix))
        top3_pval_res <- rep(NA,ncol(pval_matrix))
        top3_S2B_res <- rep(NA,ncol(pval_matrix))
      }

      names(top3_res) <- colnames(pep_matrix)
      names(top3_score_res) <- base::paste(colnames(score_matrix),"_median_score",sep="")
      names(top3_pval_res) <- base::paste(colnames(pval_matrix),"_median_pvals",sep="")
      names(top3_S2B_res) <- base::paste(colnames(pval_matrix),"_median_S2B",sep="")

      return(append(top3_res,append(top3_score_res,append(top3_pval_res,top3_S2B_res))))
    }

    if(use_isotope_pmps == F)
    {
      features_temp <- features[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
      feature_sample_matrix_requantified_temp <- feature_sample_matrix_requantified[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
      Quant_pvals_temp <- Quant_pvals[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
      Alignment_scores_temp <- Alignment_scores[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
      S2B_temp <- S2B[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
    }else
    {
      features_temp <- features[which(features$Protein != "" & !grepl(";",features$Sequence)),]
      feature_sample_matrix_requantified_temp <- feature_sample_matrix_requantified[which(features$Protein != "" & !grepl(";",features$Sequence) ),]
      Quant_pvals_temp <- Quant_pvals[which(features$Protein != "" & !grepl(";",features$Sequence) ),]
      Alignment_scores_temp <- Alignment_scores[which(features$Protein != "" & !grepl(";",features$Sequence) ),]
      S2B_temp <- S2B[which(features$Protein != "" & !grepl(";",features$Sequence) ),]
    }

    if(any(!is.na(quant_pvalue_cutoff)))
    {
      selection <- which(matrixStats::rowMins(as.matrix(Quant_pvals_temp),na.rm=T) < quant_pvalue_cutoff)

      features_temp <- features[selection,]
      feature_sample_matrix_requantified_temp <- feature_sample_matrix_requantified[selection,]
      Quant_pvals_temp <- Quant_pvals[selection,]
      Alignment_scores_temp <- Alignment_scores[selection,]
      S2B_temp <- S2B[selection,]
    }

    if(nrow(features_temp) > 0)
    {
      if(use_overlapping==T) ###use also features where a peptide is shared between 2 or more proteins
      {
        unique_proteins <- sort(unique(as.character(stringr::str_split(features_temp$Protein,"\\||;",simplify = T))))
      }else
      {
        unique_proteins <- sort(unique(features_temp$Protein[which(!grepl("\\||;",features_temp$Protein))]))
      }

      if(any(unique_proteins == ""))unique_proteins <- unique_proteins[-which(unique_proteins == "")]

      protein_TOP3 <- base::as.data.frame(matrix(ncol=1+(4*ncol(feature_sample_matrix_requantified)),nrow=length(unique_proteins),0))
      rownames(protein_TOP3) <- unique_proteins
      colnames(protein_TOP3) <- c("num_quant_features",colnames(feature_sample_matrix_requantified),base::paste("median_alignment_score_",colnames(feature_sample_matrix_requantified),sep=""),base::paste("median_quant_pvals_",colnames(feature_sample_matrix_requantified),sep=""),base::paste("median_S2B_",colnames(feature_sample_matrix_requantified),sep=""))

      max <- nrow(protein_TOP3)
      pb <- tcltk::tkProgressBar(title = "Perform Top3 quantification",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
      start_time <- Sys.time()
      updatecounter <- 0
      time_require <- 0
      for(i in 1:length(unique_proteins))
      {
        if(use_overlapping==T)
        {
          ind <- which(grepl(unique_proteins[i],features_temp$Protein))
        }else
        {
          ind <- which(features_temp$Protein == unique_proteins[i])
        }
        if(length(ind)>0)
        {
          data.table::set(protein_TOP3,as.integer(i),as.integer(1:ncol(protein_TOP3)),value=as.list(c(length(ind),as.numeric(Top3_quant(pep_matrix = feature_sample_matrix_requantified_temp[ind,],features_temp = features_temp[ind,],Alignment_scores_temp = Alignment_scores[ind,],Quant_pvals_temp=Quant_pvals[ind,],S2B_temp=S2B_temp[ind,])))))
        }else
        {
          data.table::set(protein_TOP3,as.integer(i),as.integer(1),value=0)
        }

        updatecounter <- updatecounter + 1
        if(updatecounter >= 10)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
      close(pb)
    }else
    {
      protein_TOP3 <- base::as.data.frame(matrix(ncol=1+(4*ncol(feature_sample_matrix_requantified)),nrow=0,0))
      colnames(protein_TOP3) <- c("num_quant_features",colnames(feature_sample_matrix_requantified),base::paste("median_alignment_score_",colnames(feature_sample_matrix_requantified),sep=""),base::paste("median_quant_pvals_",colnames(feature_sample_matrix_requantified),sep=""),base::paste("median_S2B_",colnames(feature_sample_matrix_requantified),sep=""))
    }

    return(protein_TOP3)
  }

  Total_Protein_Quant <- function(features,feature_sample_matrix_requantified,Alignment_scores=NULL,Quant_pvals=NULL,S2B=NULL,use_overlapping=T,min_peps=2,quant_pvalue_cutoff=0.1,use_isotope_pmps=F)
  {
    pb <- tcltk::tkProgressBar(title = "Prepare Total quantification",label=base::paste( round(0/1*100, 0),"% done"), min = 0,max = 1, width = 300)
    close(pb)
    #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
    #suppressWarnings(suppressMessages(library(lubridate,quietly = T)))
    #suppressWarnings(suppressMessages(library(matrixStats,quietly = T)))
    #suppressWarnings(suppressMessages(library(stringr,quietly = T)))

    feature_sample_matrix_requantified <- base::as.data.frame(feature_sample_matrix_requantified)
    if(!is.null(Quant_pvals))Quant_pvals <- base::as.data.frame(Quant_pvals)
    if(!is.null(Alignment_scores))Alignment_scores <- base::as.data.frame(Alignment_scores)
    if(!is.null(S2B))S2B <- base::as.data.frame(S2B)
    ##total method
    ###input matrix with samples in cols and rows correspond to unique peptides (log2 summed intensity over charge state and modification) of a respective protein
    Total_quant <- function(pep_matrix,features_temp,Alignment_scores_temp=NULL,Quant_pvals_temp=NULL,S2B_temp=NULL,Quant_cutoff=4)
    {
      sequence <- features_temp$Sequence

      if(length(sequence)>0)
      {
        pep_matrix <- stats::aggregate(2^pep_matrix, by=list(Sequence=sequence), FUN=sum,na.rm=T)
        pep_matrix <- pep_matrix[,-1]
        pep_matrix[pep_matrix==0] <- NA
        pep_matrix <- base::log2(pep_matrix)

        if(!is.null(Alignment_scores_temp))
        {
          score_matrix <- stats::aggregate(Alignment_scores_temp, by=list(Sequence=sequence), FUN=mean,na.rm=T)
          score_matrix <- score_matrix[,-1]
          score_matrix[score_matrix==0] <- NA
        }else
        {
          score_matrix <- matrix(nrow=nrow(pep_matrix),ncol=ncol(pep_matrix),NA)
          colnames(score_matrix) <- colnames(pep_matrix)
        }
        if(!is.null(Quant_pvals_temp))
        {
          pval_matrix <- stats::aggregate(Quant_pvals_temp, by=list(Sequence=sequence), FUN=mean,na.rm=T)
          pval_matrix <- pval_matrix[,-1]
          pval_matrix[pval_matrix==0] <- NA
        }else
        {
          pval_matrix <- matrix(nrow=nrow(pep_matrix),ncol=ncol(pep_matrix),NA)
          colnames(pval_matrix) <- colnames(pep_matrix)
        }
        if(!is.null(S2B_temp))
        {
          S2B_matrix <- stats::aggregate(S2B_temp, by=list(Sequence=sequence), FUN=mean,na.rm=T)
          S2B_matrix <- S2B_matrix[,-1]
          S2B_matrix[S2B_matrix==0] <- NA
        }else
        {
          S2B_matrix <- matrix(nrow=nrow(pep_matrix),ncol=ncol(pep_matrix),NA)
          colnames(S2B_matrix) <- colnames(pep_matrix)
        }

        total_res <- NULL
        total_score_res <- NULL
        total_pval_res <- NULL
        total_S2B_res <- NULL
        for(c in 1:ncol(pep_matrix))
        {
          total <- 2^pep_matrix[order(pep_matrix[,c],na.last = T,decreasing = T),c]
          sums <- sum(total,na.rm=T)

          if(!is.null(Alignment_scores_temp))
          {
            total_score <- score_matrix[order(pep_matrix[,c],na.last = T,decreasing = T),c]
            median_score <- stats::weighted.mean(total_score,total,na.rm=T)#stats::median(total_score,na.rm=T)
          }else
          {
            median_score <- NA
          }

          if(!is.null(Quant_pvals_temp))
          {
            total_pval <- pval_matrix[order(pep_matrix[,c],na.last = T,decreasing = T),c]
            median_pval <- stats::weighted.mean(total_pval,total,na.rm=T)#stats::median(total_pval,na.rm=T)
          }else
          {
            median_pval <- NA
          }

          if(!is.null(S2B_temp))
          {
            total_S2B <- S2B_matrix[order(pep_matrix[,c],na.last = T,decreasing = T),c]
            median_S2B <- stats::weighted.mean(total_S2B,total,na.rm=T)#stats::median(total_S2B,na.rm=T)
          }else
          {
            median_S2B <- NA
          }

          if(sums == 0)
          {
            sums <- NA
          }else
          {
            if(length(which(!is.na(total)))>=min_peps) ### quantification only if at least n peptide quantifications are available
            {
              sums <- sums/length(which(!is.na(total)))
            }else
            {
              sums <- NA
              median_score <- NA
              median_pval <- NA
              median_S2B <- NA
            }

          }

          total_res <- append(total_res,base::log2(sums))
          total_score_res <- append(total_score_res,median_score)
          total_pval_res <- append(total_pval_res,median_pval)
          total_S2B_res <- append(total_S2B_res,median_S2B)
        }
      }else
      {
        total_res <- rep(NA,ncol(pep_matrix))
        total_score_res <- rep(NA,ncol(score_matrix))
        total_pval_res <- rep(NA,ncol(pval_matrix))
        total_S2B_res <- rep(NA,ncol(pval_matrix))
      }

      names(total_res) <- colnames(pep_matrix)
      names(total_score_res) <- base::paste(colnames(score_matrix),"_median_score",sep="")
      names(total_pval_res) <- base::paste(colnames(pval_matrix),"_median_pvals",sep="")
      names(total_S2B_res) <- base::paste(colnames(pval_matrix),"_median_S2B",sep="")

      return(append(total_res,append(total_score_res,append(total_pval_res,total_S2B_res))))
    }

    if(use_isotope_pmps == F)
    {
      features_temp <- features[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
      feature_sample_matrix_requantified_temp <- feature_sample_matrix_requantified[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
      Quant_pvals_temp <- Quant_pvals[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
      Alignment_scores_temp <- Alignment_scores[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
      S2B_temp <- S2B[which(features$Protein != "" & !grepl(";",features$Sequence) & !grepl("_i|_pmp",features$Feature_name)),]
    }else
    {
      features_temp <- features[which(features$Protein != "" & !grepl(";",features$Sequence)),]
      feature_sample_matrix_requantified_temp <- feature_sample_matrix_requantified[which(features$Protein != "" & !grepl(";",features$Sequence) ),]
      Quant_pvals_temp <- Quant_pvals[which(features$Protein != "" & !grepl(";",features$Sequence) ),]
      Alignment_scores_temp <- Alignment_scores[which(features$Protein != "" & !grepl(";",features$Sequence) ),]
      S2B_temp <- S2B[which(features$Protein != "" & !grepl(";",features$Sequence) ),]
    }

    if(any(!is.na(quant_pvalue_cutoff)))
    {
      selection <- which(matrixStats::rowMins(as.matrix(Quant_pvals_temp),na.rm=T) < quant_pvalue_cutoff)

      features_temp <- features[selection,]
      feature_sample_matrix_requantified_temp <- feature_sample_matrix_requantified[selection,]
      Quant_pvals_temp <- Quant_pvals[selection,]
      Alignment_scores_temp <- Alignment_scores[selection,]
      S2B_temp <- S2B[selection,]
    }

    if(nrow(features_temp) > 0)
    {
      if(use_overlapping==T) ###use also features where a peptide is shared between 2 or more proteins
      {
        unique_proteins <- sort(unique(as.character(stringr::str_split(features_temp$Protein,"\\||;",simplify = T))))
      }else
      {
        unique_proteins <- sort(unique(features_temp$Protein[which(!grepl("\\||;",features_temp$Protein))]))
      }

      if(any(unique_proteins == ""))unique_proteins <- unique_proteins[-which(unique_proteins == "")]

      protein_total <- base::as.data.frame(matrix(ncol=1+(4*ncol(feature_sample_matrix_requantified)),nrow=length(unique_proteins),0))
      rownames(protein_total) <- unique_proteins
      colnames(protein_total) <- c("num_quant_features",colnames(feature_sample_matrix_requantified),base::paste("median_alignment_score_",colnames(feature_sample_matrix_requantified),sep=""),base::paste("median_quant_pvals_",colnames(feature_sample_matrix_requantified),sep=""),base::paste("median_S2B_",colnames(feature_sample_matrix_requantified),sep=""))

      max <- nrow(protein_total)
      pb <- tcltk::tkProgressBar(title = "Perform total quantification",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
      start_time <- Sys.time()
      updatecounter <- 0
      time_require <- 0
      for(i in 1:length(unique_proteins))
      {
        if(use_overlapping==T)
        {
          ind <- which(grepl(unique_proteins[i],features_temp$Protein))
        }else
        {
          ind <- which(features_temp$Protein == unique_proteins[i])
        }
        if(length(ind)>0)
        {
          data.table::set(protein_total,as.integer(i),as.integer(1:ncol(protein_total)),value=as.list(c(length(ind),as.numeric(Total_quant(pep_matrix = feature_sample_matrix_requantified_temp[ind,],features_temp = features_temp[ind,],Alignment_scores_temp = Alignment_scores[ind,],Quant_pvals_temp=Quant_pvals[ind,],S2B_temp=S2B_temp[ind,])))))
        }else
        {
          data.table::set(protein_total,as.integer(i),as.integer(1),value=0)
        }

        updatecounter <- updatecounter + 1
        if(updatecounter >= 10)
        {
          time_elapsed <- difftime(Sys.time(),start_time,units="secs")
          time_require <- (time_elapsed/(i/max))*(1-(i/max))
          td <- lubridate::seconds_to_period(time_require)
          time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

          updatecounter <- 0
          tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
        }
      }
      close(pb)
    }else
    {
      protein_total <- base::as.data.frame(matrix(ncol=1+(4*ncol(feature_sample_matrix_requantified)),nrow=0,0))
      colnames(protein_total) <- c("num_quant_features",colnames(feature_sample_matrix_requantified),base::paste("median_alignment_score_",colnames(feature_sample_matrix_requantified),sep=""),base::paste("median_quant_pvals_",colnames(feature_sample_matrix_requantified),sep=""),base::paste("median_S2B_",colnames(feature_sample_matrix_requantified),sep=""))
    }

    return(protein_total)
  }

  crap <- gc(F)
  ###perform protein level aggregation
  if(!is.null(path_to_MaxQ_output))
  {
    print(paste0(Sys.time()," Perform protein-level aggregation"))

    ###load MaxQ peptide results
    MaxQ_peptides <- utils::read.table(base::paste(path_to_MaxQ_output,"/peptides.txt",sep=""),sep="\t",header=T)
    MaxQ_peptides <- MaxQ_peptides[-which(MaxQ_peptides$Potential.contaminant == "+" | MaxQ_peptides$Reverse == "+"),]
    MaxQ_peptides_quant <- MaxQ_peptides[,which(grepl("Intensity\\.",colnames(MaxQ_peptides)))]
    MaxQ_peptides_quant[MaxQ_peptides_quant==0] <- NA
    MaxQ_peptides_quant <- base::log2(MaxQ_peptides_quant)
    MaxQ_peptides_leading_razor <- base::data.frame(Sequence=MaxQ_peptides$Sequence,Leading_razor=MaxQ_peptides$Leading.razor.protein)
    MaxQ_peptides_leading_razor$Leading_razor <- as.character(MaxQ_peptides_leading_razor$Leading_razor)
    MaxQ_peptides <- base::data.frame(Sequence=MaxQ_peptides$Sequence,MaxQ_peptides_quant)

    MaxQ_protein_groups <- utils::read.table(base::paste(path_to_MaxQ_output,"/proteinGroups.txt",sep=""),sep="\t",header=T)

    ###check if IDs were correctly parsed, if not, try to parse with SwissProt or Trembl
    if(!any(colnames(MaxQ_protein_groups) == "Gene.names") & any(colnames(MaxQ_protein_groups) == "Protein.IDs")) ##not correctly parsed but contains trembl or swissprot fasta headers
    {
      print(paste0(Sys.time()," Fasta file was not correctly parsed during search. Try to base::paste fasta headers ..."))
      if(any(grepl(">sp|>tr",MaxQ_protein_groups$Fasta.headers)))
      {
        print(paste0(Sys.time()," Detected Swiss-Prot and/or TrEMBL fasta headers."))
        ##protein level
        ###parsing was not performed correctly so we have to try to do this here expecting swissprot or trembl fasta headers
        MaxQ_protein_groups$Gene.names <- ""
        MaxQ_protein_groups$Organism <- ""
        MaxQ_protein_groups$Protein.IDs <- as.character(MaxQ_protein_groups$Protein.IDs)

        fasta_headers <- stringr::str_split(base::gsub(">","",MaxQ_protein_groups$Fasta.headers),"\\|",simplify = T)

        ##extract gene name and species information
        GN <- vector("character",nrow(MaxQ_protein_groups))
        #OS <- vector("character",nrow(MaxQ_protein_groups))
        ID <- vector("character",nrow(MaxQ_protein_groups))
        GN_start <- unlist(lapply(base::paste(gregexpr("GN=",MaxQ_protein_groups$Fasta.headers),sep=","), `[[`, 1))
        GN_start <- base::gsub("c\\(|\\)","",GN_start)
        #OS_start <- unlist(lapply(base::paste(gregexpr("OS=",MaxQ_protein_groups$Fasta.headers),sep=","), `[[`, 1))
        #OS_start <- base::gsub("c\\(|\\)","",OS_start)
        for(i in 1:nrow(MaxQ_protein_groups))
        {
          if(GN_start[i] != "-1")
          {
            indices <- as.numeric(unlist(stringr::str_split(GN_start[i],","))) + 3
            stop <- regexpr(" ",substring(MaxQ_protein_groups$Fasta.headers[i],indices,indices+10))
            GN_temp <- substring(MaxQ_protein_groups$Fasta.headers[i],indices,indices+stop-2)
            GN[i] <- base::paste(GN_temp,collapse = ";")
          }
          # if(OS_start[i] != "-1")
          # {
          #   indices <- as.numeric(unlist(stringr::str_split(OS_start[i],","))) + 3
          #   stop <- regexpr("GN=",substring(MaxQ_protein_groups$Fasta.headers[i],indices,indices+100))
          #   OS_temp <- substring(MaxQ_protein_groups$Fasta.headers[i],indices,indices+stop-3)
          #   OS[i] <- base::paste(OS_temp,collapse = ";")
          # }
          header <- fasta_headers[i,c(2,4,6)][which(fasta_headers[i,c(2,4,6)] != "")]
          if(length(header)>0)ID[i] <- base::paste(header,collapse=";")
        }
        CON_REV <- !grepl("^CON_|^REV_",MaxQ_protein_groups$Protein.IDs)
        MaxQ_protein_groups$Protein.IDs <- ifelse(CON_REV,ID, MaxQ_protein_groups$Protein.IDs)
        MaxQ_protein_groups$Gene.names <- ifelse(CON_REV,GN,"")
        #MaxQ_protein_groups$Organism <- ifelse(CON_REV,OS,"")
        MaxQ_protein_groups$Majority.protein.IDs <- MaxQ_protein_groups$Protein.IDs

        ##peptide level
        if(any(grepl("\\|",MaxQ_peptides_leading_razor$Leading_razor)))
        {
          temp <- stringr::str_split(MaxQ_peptides_leading_razor$Leading_razor,"\\|",simplify = T)
          MaxQ_peptides_leading_razor$Leading_razor <- ifelse(temp[,2] != "",temp[,2],MaxQ_peptides_leading_razor$Leading_razor)
        }

        ##requantification features
        temp <- stringr::str_split(features$Protein,"\\||;",simplify=T)
        ID <- vector("character",nrow(temp))
        for(i in 1:nrow(temp))
        {
          sel <- which(temp[i,] == "sp")+1
          if(length(sel)>0)
          {
            ID[i] <- base::paste(temp[i,sel],collapse=";")
          }else
          {
            sel <- which(temp[i,] != "")
            ID[i] <- base::paste(temp[i,sel],collapse=";")
          }
        }
        features$Protein <- ID
      }else
      {
        print(paste0(Sys.time()," No supported fasta headers were detected. Next steps might be not fully working."))
      }
    }

    #####Correct for overestimation of high intense features
    if(abundance_estimation_correction == F) ###no correction
    {
      feature_with_background_intensity <- base::log2(10^feature_with_background_intensity)
      feature_with_background_intensity_imputed <- base::log2(10^feature_with_background_intensity_imputed)

    }else ##correct abundance estimations based on MaxQ peptide abundance estimations
    {
      ###determine abundance correction factors based on MaxQ peptide intensities and data without imputation
      cor_res <- correct_intensities(features,feature_sample_matrix_requantified = feature_with_background_intensity,pval_quant = pval_signal_with_background_quant,MaxQ_peptides_quant = MaxQ_peptides,main="Signal_Background_intensity")

      feature_with_background_intensity <- cor_res[[1]]
      QC_data[["Abundance_correction"]] <- list(correction_data=cor_res[[2]],
                                                correction_fit=cor_res[[3]],
                                                correction_factor=cor_res[[4]])

      cor_res <- correct_intensities(features,feature_sample_matrix_requantified = feature_with_background_intensity_imputed,pval_quant = pval_signal_with_background_quant,MaxQ_peptides_quant = MaxQ_peptides,main="Signal_Background_intensity_imputed",corr_factor = cor_res[[4]])

      feature_with_background_intensity_imputed <- cor_res[[1]]
    }

    ###get leading razor ID per peptide sequence
    features$Protein <- as.character(features$Protein)
    features$all_matching_Proteins <- features$Protein
    features$Protein <- as.character(MaxQ_peptides_leading_razor$Leading_razor[match(features$Sequence,MaxQ_peptides_leading_razor$Sequence)])
    features$Protein[is.na(features$Protein)] <- features$all_matching_Proteins[is.na(features$Protein)]

    ###perform peptide level LFQ
    if(calc_peptide_LFQ == T)
    {
      LFQ_peptide_quant_process <- function(features,features_quant,n_cores,label="Perform peptide-LFQ-quantification",num_ratio_samples=NA,TopN=5,seed=1)
      {
        set.seed(seed)
        ##prepare for MaxLFQ algorithm
        #suppressWarnings(suppressMessages(library(doSNOW,quietly = T)))
        #suppressWarnings(suppressMessages(library(data.table,quietly = T)))

        calculate_LFQ <- function(peptide_quant_data,min_num_ratios=2,num_ratio_samples=NA)
        {
          #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
          #suppressWarnings(suppressMessages(library(robustbase,quietly = T)))
          #error function which is used to optimize ratios
          least_square_error <- function(par,ratio_mat)
          {
            sum <- 0
            vals <- NULL
            if(ncol(ratio_mat)>1)
            {
              for(c in 1:(ncol(ratio_mat)-1))
              {
                for(r in (c+1):nrow(ratio_mat))
                {
                  val <- (base::log2(ratio_mat[r,c])-base::log2(par[r])+base::log2(par[c]))^2
                  vals <- append(vals,val)
                  if(!is.na(val)) sum <- sum + val
                }
              }
            }

            if(sum == 0){sum = NA}
            return(sum)
          }
          #unlog intensities
          peptide_quant_data <- 2^peptide_quant_data
          #calculate summed intensities per sample
          totalsum_per_sample <- colSums(peptide_quant_data,na.rm=T)
          #determine median ratio matrix between all samples
          ratio_mat <- base::as.data.frame(matrix(nrow=ncol(peptide_quant_data),ncol=ncol(peptide_quant_data)))
          if(ncol(ratio_mat)>1)
          {
            for(c in 1:(ncol(ratio_mat)-1))
            {
              for(r in (c+1):nrow(ratio_mat))
              {
                ratios <- peptide_quant_data[,r]/peptide_quant_data[,c]
                if(length(which(!is.na(ratios))) >= min_num_ratios){ratio_mat[r,c] <- stats::median(ratios,na.rm=T)}
              }
            }
          }
          if(any(is.na(num_ratio_samples))) ###calculate ratios over all samples
          {
            #define start parameter
            start_par <- c(rep(1,ncol(ratio_mat)))
            #now find optimum in ratios to best recover true observed ratios between samples
            res_ratio <- NULL
            try(res_ratio <- stats::optim(par=start_par, fn=least_square_error, ratio_mat=ratio_mat,lower = 1,upper=100,method = "L-BFGS-B"),silent = T)
            if(!is.null(res_ratio))
            {
              #normalize ratios to sample with highest intensity
              ratio_norm <- res_ratio$par / res_ratio$par[which(totalsum_per_sample == max(totalsum_per_sample))]
              #finally calculate log2 lfq protein intensities per sample
              lfq <- base::log2(ratio_norm*totalsum_per_sample[which(totalsum_per_sample == max(totalsum_per_sample))])
              #remove quant values for samples were no ratios were available
              if(any(colSums(!is.na(ratio_mat)) == 0 & rowSums(!is.na(ratio_mat)) == 0))
              {
                sel <- as.numeric(which(colSums(!is.na(ratio_mat)) == 0 & rowSums(!is.na(ratio_mat)) == 0))
                lfq[sel] <- NA
              }
            }else
            {
              lfq <- rep(NA,ncol(peptide_quant_data))
            }

          }else ###calculate ratios only for a subset of samples
          {
            lfq <- base::as.data.frame(matrix(nrow=ncol(peptide_quant_data),ncol=ncol(peptide_quant_data),0))
            for(s in 1:ncol(peptide_quant_data))
            {
              ###randomly select up to 6 other samples from list of samples which also contain quantifications
              samples_with_quant <- as.numeric(which(colSums(peptide_quant_data,na.rm=T) > 0))
              samples_with_quant <- samples_with_quant[which(samples_with_quant != s)]
              if(length(samples_with_quant)>0)
              {
                samples_for_comparison_per_sample <- sample(samples_with_quant,ifelse(length(samples_with_quant)>num_ratio_samples,num_ratio_samples,length(samples_with_quant)))
              }else
              {
                samples_for_comparison_per_sample <- sample(c(1:ncol(peptide_quant_data))[-i],size = num_ratio_samples,replace = F)
              }

              ratio_mat_temp <- ratio_mat[c(s,sort(samples_for_comparison_per_sample)),c(s,sort(samples_for_comparison_per_sample))]
              if(length(which(!is.na(ratio_mat_temp))) < 3 & any(!is.na(peptide_quant_data[,s])))##to few of selected random samples show an observed intensity ratio but protein is quantified in current sample
              {
                ###randomly select other samples

              }

              totalsum_per_sample_temp <- totalsum_per_sample[c(s,sort(samples_for_comparison_per_sample))]
              #define start parameter
              start_par <- c(rep(1,ncol(ratio_mat_temp)))
              #now find optimum in ratios to best recover true observed ratios between samples
              res_ratio <- NULL
              try(res_ratio <- stats::optim(par=start_par, fn=least_square_error, ratio_mat=ratio_mat_temp,lower = 1,upper=100,method = "L-BFGS-B"),silent = T)

              if(!is.null(res_ratio))
              {
                #normalize ratios to sample with highest intensity
                ratio_norm <- res_ratio$par / res_ratio$par[which(totalsum_per_sample_temp == max(totalsum_per_sample_temp,na.rm=T))]
                #finally calculate log2 lfq protein intensities per sample
                temp_lfq <- base::log2(ratio_norm*totalsum_per_sample_temp[which(totalsum_per_sample_temp == max(totalsum_per_sample_temp))])

                data.table::set(lfq,as.integer(s),as.integer(c(s,sort(samples_for_comparison_per_sample))),as.list(temp_lfq))
              }

            }
            lfq[lfq==0] <- NA
            lfq <- matrixStats::colMedians(as.matrix(lfq),na.rm=T)

            #remove quant values for samples were no ratios were available
            if(any(colSums(!is.na(ratio_mat)) == 0 & rowSums(!is.na(ratio_mat)) == 0))
            {
              sel <- as.numeric(which(colSums(!is.na(ratio_mat)) == 0 & rowSums(!is.na(ratio_mat)) == 0))
              lfq[sel] <- NA
            }


          }
          return(lfq)
        }

        ###txtProgressBar from package pbarETA (Francesco Napolitano) - License: LGPL-3
        txtProgressBar <- function (min = 0, max = 1, initial = 0, char = "=", width = NA,
                                    title, label, style = 3, file = "")
        {
          formatTime <- function(seconds) {
            if (seconds == Inf || is.nan(seconds) || is.na(seconds))
              return("NA")
            seconds <- round(seconds)
            sXmin <- 60
            sXhr <- sXmin * 60
            sXday <- sXhr * 24
            sXweek <- sXday * 7
            sXmonth <- sXweek * 4.22
            sXyear <- sXmonth * 12
            years <- floor(seconds/sXyear)
            seconds <- seconds - years * sXyear
            months <- floor(seconds/sXmonth)
            seconds <- seconds - months * sXmonth
            weeks <- floor(seconds/sXweek)
            seconds <- seconds - weeks * sXweek
            days <- floor(seconds/sXday)
            seconds <- seconds - days * sXday
            hours <- floor(seconds/sXhr)
            seconds <- seconds - hours * sXhr
            minutes <- floor(seconds/sXmin)
            seconds <- seconds - minutes * sXmin
            ETA <- c(years, months, days, hours, minutes, seconds)
            startst <- which(ETA > 0)[1]
            if (is.na(startst))
              startst <- 6
            starts <- min(startst, 4)
            fmtstr <- rep("%02d", length(ETA))[startst:length(ETA)]
            fmtstr <- base::paste(fmtstr, collapse = ":")
            return(do.call(sprintf, as.list(c(as.list(fmtstr), ETA[startst:length(ETA)]))))
          }
          if (!identical(file, "") && !(inherits(file, "connection") &&
                                        isOpen(file)))
            stop("'file' must be \"\" or an open connection object")
          if (!style %in% 1L:3L)
            style <- 1
          .val <- initial
          .killed <- FALSE
          .nb <- 0L
          .pc <- -1L
          .time0 <- NA
          .timenow <- NA
          .firstUpdate <- T
          nw <- nchar(char, "w")
          if (is.na(width)) {
            width <- getOption("width")
            if (style == 3L)
              width <- width - 10L
            width <- trunc(width/nw)
          }
          if (max <= min)
            stop("must have 'max' > 'min'")
          up1 <- function(value) {
            if (!is.finite(value) || value < min || value > max)
              return()
            .val <<- value
            nb <- round(width * (value - min)/(max - min))
            if (.nb < nb) {
              cat(base::paste(rep.int(char, nb - .nb), collapse = ""),
                  file = file)
              utils::flush.console()
            }
            else if (.nb > nb) {
              cat("\r", base::paste(rep.int(" ", .nb * nw), collapse = ""),
                  "\r", base::paste(rep.int(char, nb), collapse = ""),
                  sep = "", file = file)
              utils::flush.console()
            }
            .nb <<- nb
          }
          up2 <- function(value) {
            if (!is.finite(value) || value < min || value > max)
              return()
            .val <<- value
            nb <- round(width * (value - min)/(max - min))
            if (.nb <= nb) {
              cat("\r", base::paste(rep.int(char, nb), collapse = ""),
                  sep = "", file = file)
              utils::flush.console()
            }
            else {
              cat("\r", base::paste(rep.int(" ", .nb * nw), collapse = ""),
                  "\r", base::paste(rep.int(char, nb), collapse = ""),
                  sep = "", file = file)
              utils::flush.console()
            }
            .nb <<- nb
          }
          up3 <- function(value, calledOnCreation = F) {
            timenow <- proc.time()[["elapsed"]]
            if (!calledOnCreation && .firstUpdate) {
              .time0 <<- timenow
              .timenow <<- timenow
              .firstUpdate <<- F
            }
            if (!is.finite(value) || value < min || value > max)
              return()
            .val <<- value
            nb <- round(width * (value - min)/(max - min))
            pc <- round(100 * (value - min)/(max - min))
            if (nb == .nb && pc == .pc && timenow - .timenow < 1)
              return()
            .timenow <<- timenow
            span <- timenow - .time0
            timeXiter <- span/(.val - min)
            ETA <- (max - .val) * timeXiter
            ETAstr <- formatTime(ETA)
            cat(base::paste(c("\r  |", rep.int(" ", nw * width + 6)),
                      collapse = ""), file = file)
            cat(base::paste(c("\r  |", rep.int(char, nb), rep.int(" ",
                                                            nw * (width - nb)), sprintf("| %3d%%", pc), ", ETA ",
                        ETAstr), collapse = ""), file = file)
            utils::flush.console()
            .nb <<- nb
            .pc <<- pc
          }
          getVal <- function() .val
          kill <- function() if (!.killed) {
            cat("\n", file = file)
            utils::flush.console()
            .killed <<- TRUE
          }
          up <- switch(style, up1, up2, up3)
          up(initial, T)
          structure(list(getVal = getVal, up = up, kill = kill), class = "txtProgressBar")
        }

        sel <- which(!grepl(";|,",features$Sequence))
        unique_peptides <- sort(unique(base::paste(features$Sequence[sel],features$Modifications[sel],sep="_")))
        unique_seq <- base::substr(unique_peptides,1,regexpr("_",unique_peptides)-1)
        unique_mod <- base::substr(unique_peptides,regexpr("_",unique_peptides)+1,nchar(unique_peptides))

        LFQ_peptide_quant <- base::as.data.frame(matrix(ncol=ncol(features_quant),nrow=length(unique_peptides),0))
        colnames(LFQ_peptide_quant) <- colnames(features_quant)

        ###Perform LFQ quantification
        cl <- snow::makeCluster(n_cores)#as.numeric(Sys.getenv('NUMBER_OF_PROCESSORS')))
        doSNOW::registerDoSNOW(cl)
        iterations <- nrow(LFQ_peptide_quant)
        progress <- function(n) utils::setTxtProgressBar(pb, n)
        opts <- list(progress = progress)
        start <- Sys.time()
        print(base::paste(label," (",Sys.time(),")",sep=""))
        pb <- txtProgressBar(max = iterations, style = 3)
        res_LFQ <- foreach::foreach(i=1:nrow(LFQ_peptide_quant),.options.snow = opts) %dopar%
          {
            sel <- which(features$Sequence == unique_seq[i] & features$Modifications == unique_mod[i])
            testdat = features_quant[sel,]
            if(nrow(testdat) > TopN) ###if more than 5 peptides are available select TopN peptides according to intensity over all samples (select highest abundant peptides as quantifications are more accurate)
            {
              total_abundance <- rowSums(testdat,na.rm=T)
              testdat <- testdat[order(total_abundance,decreasing = T)[1:TopN],]
            }
            if(nrow(testdat)>1)
            {
              res <- calculate_LFQ(peptide_quant_data = testdat,min_num_ratios = 1,num_ratio_samples=num_ratio_samples)
            }else
            {
              res <- as.numeric(testdat)
              names(res) <- base::paste("V",1:length(res),sep="")
            }
            return(res)
          }
        snow::stopCluster(cl)
        close(pb)

        #Combine results
        for(i in 1:length(res_LFQ))
        {
          if(length(res_LFQ[[i]])>0)
          {
            data.table::set(LFQ_peptide_quant,as.integer(i),as.integer(1:ncol(LFQ_peptide_quant)),as.list(as.numeric(res_LFQ[[i]])))
          }
        }

        #add information about number of quant features per protein
        count_quant_features <- plyr::count(sort(base::paste(features$Sequence[sel],features$Modifications[sel],sep="_")))

        LFQ_peptide_quant <- base::data.frame(Sequence=unique_seq,
                                        Modifications=unique_mod,
                                        Protein=features$Protein[match(unique_seq,features$Sequence)],
                                        num_quant_features=count_quant_features$freq[match(unique_peptides,count_quant_features$x)],
                                        LFQ_peptide_quant)

        colnames(LFQ_peptide_quant) <- base::gsub("^X","",colnames(LFQ_peptide_quant))

        end <- Sys.time()
        print(base::paste("Finished peptide LFQ-quantification (",Sys.time(),")",sep=""))
        print(end - start)
        #replace 0 by NA
        LFQ_peptide_quant[LFQ_peptide_quant==0] <- NA

        return(LFQ_peptide_quant)
      }

      if(ncol(feature_with_background_intensity) <= 10)num_ratio_samples <- NA
      if(ncol(feature_with_background_intensity) > 10)num_ratio_samples <- 6

      LFQ_peptide_quant_with_background <- LFQ_peptide_quant_process(features,feature_with_background_intensity,n_cores,label="Perform peptide LFQ-quantification for non-imputed data",num_ratio_samples = num_ratio_samples)
      LFQ_peptide_quant_with_background_imputed <- LFQ_peptide_quant_process(features,feature_with_background_intensity_imputed,n_cores,label="Perform peptide LFQ-quantification for imputed data",num_ratio_samples = num_ratio_samples)
      save(LFQ_peptide_quant_with_background,LFQ_peptide_quant_with_background_imputed,file = "Peptide_LFQ_temp.RData")
    }

    ###perform protein level quantification
    if(calc_protein_LFQ == T)
    {
      ##implementation of the MaxLFQ algorithm. num_ratio_samples indicates between how many samples the ratio matrices should be determined.
      #if num_ratio_samples is set to NA it will perform least-square analysis between all samples
      #if num_ratio_samples is set to a number < number of samples least square analysis is performed for randomly picked n (=num_ratio_samples) samples to reduced computation time
      LFQ_protein_quant_process <- function(features,features_quant,n_cores,label="Perform LFQ-quantification",num_ratio_samples=NA,TopN=5,seed=1)
      {
        set.seed(seed)
        ##prepare for MaxLFQ algorithm
        #suppressWarnings(suppressMessages(library(doSNOW,quietly = T)))
        #suppressWarnings(suppressMessages(library(data.table,quietly = T)))

        calculate_LFQ <- function(peptide_quant_data,min_num_ratios=2,num_ratio_samples=NA)
        {
          #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
          #suppressWarnings(suppressMessages(library(robustbase,quietly = T)))
          #error function which is used to optimize ratios
          least_square_error <- function(par,ratio_mat)
          {
            sum <- 0
            vals <- NULL
            if(ncol(ratio_mat)>1)
            {
              for(c in 1:(ncol(ratio_mat)-1))
              {
                for(r in (c+1):nrow(ratio_mat))
                {
                  val <- (base::log2(ratio_mat[r,c])-base::log2(par[r])+base::log2(par[c]))^2
                  vals <- append(vals,val)
                  if(!is.na(val)) sum <- sum + val
                }
              }
            }

            if(sum == 0){sum = NA}
            return(sum)
          }
          #unlog intensities
          peptide_quant_data <- 2^peptide_quant_data
          #calculate summed intensities per sample
          totalsum_per_sample <- colSums(peptide_quant_data,na.rm=T)
          #determine median ratio matrix between all samples
          ratio_mat <- base::as.data.frame(matrix(nrow=ncol(peptide_quant_data),ncol=ncol(peptide_quant_data)))
          if(ncol(ratio_mat)>1)
          {
            for(c in 1:(ncol(ratio_mat)-1))
            {
              for(r in (c+1):nrow(ratio_mat))
              {
                ratios <- peptide_quant_data[,r]/peptide_quant_data[,c]
                if(length(which(!is.na(ratios))) >= min_num_ratios){ratio_mat[r,c] <- stats::median(ratios,na.rm=T)}
              }
            }
          }
          if(any(is.na(num_ratio_samples))) ###calculate ratios over all samples
          {
            #define start parameter
            start_par <- c(rep(1,ncol(ratio_mat)))
            #now find optimum in ratios to best recover true observed ratios between samples
            res_ratio <- NULL
            try(res_ratio <- stats::optim(par=start_par, fn=least_square_error, ratio_mat=ratio_mat,lower = 1,upper=100,method = "L-BFGS-B"),silent = T)
            if(!is.null(res_ratio))
            {
              #normalize ratios to sample with highest intensity
              ratio_norm <- res_ratio$par / res_ratio$par[which(totalsum_per_sample == max(totalsum_per_sample))]
              #finally calculate log2 lfq protein intensities per sample
              lfq <- base::log2(ratio_norm*totalsum_per_sample[which(totalsum_per_sample == max(totalsum_per_sample))])
              #remove quant values for samples were no ratios were available
              if(any(colSums(!is.na(ratio_mat)) == 0 & rowSums(!is.na(ratio_mat)) == 0))
              {
                sel <- as.numeric(which(colSums(!is.na(ratio_mat)) == 0 & rowSums(!is.na(ratio_mat)) == 0))
                lfq[sel] <- NA
              }
            }else
            {
              lfq <- rep(NA,ncol(peptide_quant_data))
            }

          }else ###calculate ratios only for a subset of samples
          {
            lfq <- base::as.data.frame(matrix(nrow=ncol(peptide_quant_data),ncol=ncol(peptide_quant_data),0))
            for(s in 1:ncol(peptide_quant_data))
            {
              ###randomly select up to 6 other samples from list of samples which also contain quantifications
              samples_with_quant <- as.numeric(which(colSums(peptide_quant_data,na.rm=T) > 0))
              samples_with_quant <- samples_with_quant[which(samples_with_quant != s)]
              if(length(samples_with_quant)>0)
              {
                samples_for_comparison_per_sample <- sample(samples_with_quant,ifelse(length(samples_with_quant)>num_ratio_samples,num_ratio_samples,length(samples_with_quant)))
              }else
              {
                samples_for_comparison_per_sample <- sample(c(1:ncol(peptide_quant_data))[-i],size = num_ratio_samples,replace = F)
              }

              ratio_mat_temp <- ratio_mat[c(s,sort(samples_for_comparison_per_sample)),c(s,sort(samples_for_comparison_per_sample))]
              if(length(which(!is.na(ratio_mat_temp))) < 3 & any(!is.na(peptide_quant_data[,s])))##to few of selected random samples show an observed intensity ratio but protein is quantified in current sample
              {
                ###randomly select other samples

              }

              totalsum_per_sample_temp <- totalsum_per_sample[c(s,sort(samples_for_comparison_per_sample))]
              #define start parameter
              start_par <- c(rep(1,ncol(ratio_mat_temp)))
              #now find optimum in ratios to best recover true observed ratios between samples
              res_ratio <- NULL
              try(res_ratio <- stats::optim(par=start_par, fn=least_square_error, ratio_mat=ratio_mat_temp,lower = 1,upper=100,method = "L-BFGS-B"),silent = T)

              if(!is.null(res_ratio))
              {
                #normalize ratios to sample with highest intensity
                ratio_norm <- res_ratio$par / res_ratio$par[which(totalsum_per_sample_temp == max(totalsum_per_sample_temp,na.rm=T))]
                #finally calculate log2 lfq protein intensities per sample
                temp_lfq <- base::log2(ratio_norm*totalsum_per_sample_temp[which(totalsum_per_sample_temp == max(totalsum_per_sample_temp))])

                data.table::set(lfq,as.integer(s),as.integer(c(s,sort(samples_for_comparison_per_sample))),as.list(temp_lfq))
              }

            }
            lfq[lfq==0] <- NA
            lfq <- matrixStats::colMedians(as.matrix(lfq),na.rm=T)

            #remove quant values for samples were no ratios were available
            if(any(colSums(!is.na(ratio_mat)) == 0 & rowSums(!is.na(ratio_mat)) == 0))
            {
              sel <- as.numeric(which(colSums(!is.na(ratio_mat)) == 0 & rowSums(!is.na(ratio_mat)) == 0))
              lfq[sel] <- NA
            }


          }
          return(lfq)
        }

        ###txtProgressBar from package pbarETA (Francesco Napolitano) - License: LGPL-3
        txtProgressBar <- function (min = 0, max = 1, initial = 0, char = "=", width = NA,
                                    title, label, style = 3, file = "")
        {
          formatTime <- function(seconds) {
            if (seconds == Inf || is.nan(seconds) || is.na(seconds))
              return("NA")
            seconds <- round(seconds)
            sXmin <- 60
            sXhr <- sXmin * 60
            sXday <- sXhr * 24
            sXweek <- sXday * 7
            sXmonth <- sXweek * 4.22
            sXyear <- sXmonth * 12
            years <- floor(seconds/sXyear)
            seconds <- seconds - years * sXyear
            months <- floor(seconds/sXmonth)
            seconds <- seconds - months * sXmonth
            weeks <- floor(seconds/sXweek)
            seconds <- seconds - weeks * sXweek
            days <- floor(seconds/sXday)
            seconds <- seconds - days * sXday
            hours <- floor(seconds/sXhr)
            seconds <- seconds - hours * sXhr
            minutes <- floor(seconds/sXmin)
            seconds <- seconds - minutes * sXmin
            ETA <- c(years, months, days, hours, minutes, seconds)
            startst <- which(ETA > 0)[1]
            if (is.na(startst))
              startst <- 6
            starts <- min(startst, 4)
            fmtstr <- rep("%02d", length(ETA))[startst:length(ETA)]
            fmtstr <- base::paste(fmtstr, collapse = ":")
            return(do.call(sprintf, as.list(c(as.list(fmtstr), ETA[startst:length(ETA)]))))
          }
          if (!identical(file, "") && !(inherits(file, "connection") &&
                                        isOpen(file)))
            stop("'file' must be \"\" or an open connection object")
          if (!style %in% 1L:3L)
            style <- 1
          .val <- initial
          .killed <- FALSE
          .nb <- 0L
          .pc <- -1L
          .time0 <- NA
          .timenow <- NA
          .firstUpdate <- T
          nw <- nchar(char, "w")
          if (is.na(width)) {
            width <- getOption("width")
            if (style == 3L)
              width <- width - 10L
            width <- trunc(width/nw)
          }
          if (max <= min)
            stop("must have 'max' > 'min'")
          up1 <- function(value) {
            if (!is.finite(value) || value < min || value > max)
              return()
            .val <<- value
            nb <- round(width * (value - min)/(max - min))
            if (.nb < nb) {
              cat(base::paste(rep.int(char, nb - .nb), collapse = ""),
                  file = file)
              utils::flush.console()
            }
            else if (.nb > nb) {
              cat("\r", base::paste(rep.int(" ", .nb * nw), collapse = ""),
                  "\r", base::paste(rep.int(char, nb), collapse = ""),
                  sep = "", file = file)
              utils::flush.console()
            }
            .nb <<- nb
          }
          up2 <- function(value) {
            if (!is.finite(value) || value < min || value > max)
              return()
            .val <<- value
            nb <- round(width * (value - min)/(max - min))
            if (.nb <= nb) {
              cat("\r", base::paste(rep.int(char, nb), collapse = ""),
                  sep = "", file = file)
              utils::flush.console()
            }
            else {
              cat("\r", base::paste(rep.int(" ", .nb * nw), collapse = ""),
                  "\r", base::paste(rep.int(char, nb), collapse = ""),
                  sep = "", file = file)
              utils::flush.console()
            }
            .nb <<- nb
          }
          up3 <- function(value, calledOnCreation = F) {
            timenow <- proc.time()[["elapsed"]]
            if (!calledOnCreation && .firstUpdate) {
              .time0 <<- timenow
              .timenow <<- timenow
              .firstUpdate <<- F
            }
            if (!is.finite(value) || value < min || value > max)
              return()
            .val <<- value
            nb <- round(width * (value - min)/(max - min))
            pc <- round(100 * (value - min)/(max - min))
            if (nb == .nb && pc == .pc && timenow - .timenow < 1)
              return()
            .timenow <<- timenow
            span <- timenow - .time0
            timeXiter <- span/(.val - min)
            ETA <- (max - .val) * timeXiter
            ETAstr <- formatTime(ETA)
            cat(base::paste(c("\r  |", rep.int(" ", nw * width + 6)),
                      collapse = ""), file = file)
            cat(base::paste(c("\r  |", rep.int(char, nb), rep.int(" ",
                                                            nw * (width - nb)), sprintf("| %3d%%", pc), ", ETA ",
                        ETAstr), collapse = ""), file = file)
            utils::flush.console()
            .nb <<- nb
            .pc <<- pc
          }
          getVal <- function() .val
          kill <- function() if (!.killed) {
            cat("\n", file = file)
            utils::flush.console()
            .killed <<- TRUE
          }
          up <- switch(style, up1, up2, up3)
          up(initial, T)
          structure(list(getVal = getVal, up = up, kill = kill), class = "txtProgressBar")
        }

        unique_proteins <- sort(unique(features$Protein[which(!grepl(";|,",features$Protein))]))
        unique_proteins <- unique_proteins[which(unique_proteins != "")]

        LFQ_protein_quant <- base::as.data.frame(matrix(ncol=ncol(features_quant),nrow=length(unique_proteins),0))
        colnames(LFQ_protein_quant) <- colnames(features_quant)
        rownames(LFQ_protein_quant) <- unique_proteins

        ###Perform LFQ quantification
        cl <- snow::makeCluster(n_cores)#as.numeric(Sys.getenv('NUMBER_OF_PROCESSORS')))
        doSNOW::registerDoSNOW(cl)
        iterations <- nrow(LFQ_protein_quant)
        progress <- function(n) utils::setTxtProgressBar(pb, n)
        opts <- list(progress = progress)
        start <- Sys.time()
        print(base::paste(label," (",Sys.time(),")",sep=""))
        pb <- txtProgressBar(max = iterations, style = 3)
        res_LFQ <- foreach::foreach(i=1:nrow(LFQ_protein_quant),.options.snow = opts) %dopar%
          {
            sel <- which(features$Protein == unique_proteins[i])
            testdat = features_quant[sel,]
            if(nrow(testdat) > TopN) ###if more than 5 peptides are available select TopN peptides according to intensity over all samples (select highest abundant peptides as quantifications are more accurate)
            {
              total_abundance <- rowSums(testdat,na.rm=T)
              testdat <- testdat[order(total_abundance,decreasing = T)[1:TopN],]
            }
            if(nrow(testdat) >= 2)
            {
              res <- calculate_LFQ(peptide_quant_data = testdat,min_num_ratios = 2,num_ratio_samples=num_ratio_samples)
            }else
            {
              res <- as.numeric(rep(NA,ncol(features_quant)))
              names(res) <- base::paste("V",1:length(res),sep="")
            }

            return(res)
          }
        snow::stopCluster(cl)
        close(pb)

        #Combine results
        for(i in 1:length(res_LFQ))
        {
          if(length(res_LFQ[[i]])>0)
          {
            data.table::set(LFQ_protein_quant,as.integer(i),as.integer(1:ncol(LFQ_protein_quant)),as.list(as.numeric(res_LFQ[[i]])))
          }
        }
        #add information about number of quant features per protein
        count_quant_features <- plyr::count(features$Protein[which(!grepl(";|\\||,",features$Protein))])

        LFQ_protein_quant <- base::data.frame(num_quant_features=count_quant_features$freq[match(rownames(LFQ_protein_quant),count_quant_features$x)],LFQ_protein_quant)

        end <- Sys.time()
        print(base::paste("Finished LFQ-quantification (",Sys.time(),")",sep=""))
        print(end - start)
        #replace 0 by NA
        LFQ_protein_quant[LFQ_protein_quant==0] <- NA

        return(LFQ_protein_quant)
      }

      if(ncol(feature_with_background_intensity) <= 10)num_ratio_samples <- NA
      if(ncol(feature_with_background_intensity) > 10)num_ratio_samples <- 6

      if(calc_peptide_LFQ == F)
      {
        LFQ_quant_with_background <- LFQ_protein_quant_process(features,feature_with_background_intensity,n_cores,label="Perform LFQ-quantification for non-imputed data",num_ratio_samples = num_ratio_samples)
        LFQ_quant_with_background_imputed <- LFQ_protein_quant_process(features,feature_with_background_intensity_imputed,n_cores,label="Perform LFQ-quantification for imputed data",num_ratio_samples = num_ratio_samples)
      }else ###Use peptide LFQ for calcualting protein LFQ
      {
        LFQ_quant_with_background <- LFQ_protein_quant_process(LFQ_peptide_quant_with_background[,c(1:4)],LFQ_peptide_quant_with_background[,c(5:ncol(LFQ_peptide_quant_with_background))],n_cores,label="Perform LFQ-quantification for non-imputed data",num_ratio_samples = num_ratio_samples)
        LFQ_quant_with_background_imputed <- LFQ_protein_quant_process(LFQ_peptide_quant_with_background_imputed[,c(1:4)],LFQ_peptide_quant_with_background_imputed[,c(5:ncol(LFQ_peptide_quant_with_background_imputed))],n_cores,label="Perform LFQ-quantification for imputed data",num_ratio_samples = num_ratio_samples)
      }
    }

    #Perform Top3 and Total quantification
    cl <- parallel::makeCluster(ifelse(n_cores < 4,n_cores,4))#as.numeric(Sys.getenv('NUMBER_OF_PROCESSORS')))
    doParallel::registerDoParallel(cl)
    res <- foreach::foreach(i=1:4) %dopar%
      {
        if(i == 1)
        {
          ###Perform Top3 protein quantification
          res <- Top3_Protein_Quant(features = features,feature_sample_matrix_requantified = feature_with_background_intensity,Quant_pvals=pval_signal_with_background_quant,S2B=S2B,Alignment_scores = alignment_scores_peaks_correct,use_overlapping = T,min_peps = 1)
        }
        if(i == 2)
        {
          ###Perform Total protein quantification
          res <- Total_Protein_Quant(features = features,feature_sample_matrix_requantified = feature_with_background_intensity,Quant_pvals=pval_signal_with_background_quant,S2B=S2B,Alignment_scores = alignment_scores_peaks_correct,use_overlapping = T,min_peps = 1)
        }
        if(i == 3)
        {
          ###Perform Top3 protein quantification for imputed data
          res <- Top3_Protein_Quant(features = features,feature_sample_matrix_requantified = feature_with_background_intensity_imputed,Quant_pvals=pval_signal_with_background_quant,Alignment_scores = alignment_scores_peaks_correct,S2B=S2B,use_overlapping = T,min_peps = 1)
        }
        if(i == 4)
        {
          ###Perform Total protein quantification for imputed data
          res <- Total_Protein_Quant(features = features,feature_sample_matrix_requantified = feature_with_background_intensity_imputed,Quant_pvals=pval_signal_with_background_quant,Alignment_scores = alignment_scores_peaks_correct,S2B=S2B,use_overlapping = T,min_peps = 1)
        }
        return(res)
      }
    parallel::stopCluster(cl)

    Top3_quant_with_background <- res[[1]]
    Total_quant_with_background <- res[[2]]
    Top3_quant_with_background_imputed <- res[[3]]
    Total_quant_with_background_imputed <- res[[4]]

    ###Match Gene names to Uniprot Identifier
    if(any(colnames(MaxQ_protein_groups) == "Gene.names") & any(colnames(MaxQ_protein_groups) == "Protein.IDs"))
    {
      temp <- MaxQ_protein_groups[,c("Gene.names","Protein.IDs")]
      temp <- temp[which(temp$Gene.names != ""),]

      UniProt_to_GeneName <- base::data.frame(UniProt_ID=unique(as.character(stringr::str_split(temp$Protein.IDs,";",simplify = T))),Gene_Name="")
      UniProt_to_GeneName$Gene_Name <- as.character(UniProt_to_GeneName$Gene_Name)

      for(i in 1:nrow(UniProt_to_GeneName))
      {
        UniProt_to_GeneName$Gene_Name[i] <- as.character(temp$Gene.names[which(grepl(UniProt_to_GeneName$UniProt_ID[i],temp$Protein.IDs))])
      }

      Top3_quant_with_background <- base::data.frame(Gene_Name=UniProt_to_GeneName$Gene_Name[match(rownames(Top3_quant_with_background),UniProt_to_GeneName$UniProt_ID)],UniProt_Identifier=rownames(Top3_quant_with_background),Top3_quant_with_background)
      rownames(Top3_quant_with_background) <- c()

      Total_quant_with_background <- base::data.frame(Gene_Name=UniProt_to_GeneName$Gene_Name[match(rownames(Total_quant_with_background),UniProt_to_GeneName$UniProt_ID)],UniProt_Identifier=rownames(Total_quant_with_background),Total_quant_with_background)
      rownames(Total_quant_with_background) <- c()

      Top3_quant_with_background_imputed <- base::data.frame(Gene_Name=UniProt_to_GeneName$Gene_Name[match(rownames(Top3_quant_with_background_imputed),UniProt_to_GeneName$UniProt_ID)],UniProt_Identifier=rownames(Top3_quant_with_background_imputed),Top3_quant_with_background_imputed)
      rownames(Top3_quant_with_background_imputed) <- c()

      Total_quant_with_background_imputed <- base::data.frame(Gene_Name=UniProt_to_GeneName$Gene_Name[match(rownames(Total_quant_with_background_imputed),UniProt_to_GeneName$UniProt_ID)],UniProt_Identifier=rownames(Total_quant_with_background_imputed),Total_quant_with_background_imputed)
      rownames(Total_quant_with_background_imputed) <- c()

      if(calc_peptide_LFQ == T)
      {
        LFQ_peptide_quant_with_background <- base::data.frame(Gene_Name=UniProt_to_GeneName$Gene_Name[match(LFQ_peptide_quant_with_background$Protein,UniProt_to_GeneName$UniProt_ID)],UniProt_Identifier=LFQ_peptide_quant_with_background$Protein,LFQ_peptide_quant_with_background[,c(1,2,4,5:ncol(LFQ_peptide_quant_with_background))])
        rownames(LFQ_peptide_quant_with_background) <- c()
        colnames(LFQ_peptide_quant_with_background) <- base::gsub("^X","",colnames(LFQ_peptide_quant_with_background))

        LFQ_peptide_quant_with_background_imputed <- base::data.frame(Gene_Name=UniProt_to_GeneName$Gene_Name[match(LFQ_peptide_quant_with_background_imputed$Protein,UniProt_to_GeneName$UniProt_ID)],UniProt_Identifier=LFQ_peptide_quant_with_background_imputed$Protein,LFQ_peptide_quant_with_background_imputed[,c(1,2,4,5:ncol(LFQ_peptide_quant_with_background_imputed))])
        rownames(LFQ_peptide_quant_with_background_imputed) <- c()
        colnames(LFQ_peptide_quant_with_background_imputed) <- base::gsub("^X","",colnames(LFQ_peptide_quant_with_background_imputed))
      }

      if(calc_protein_LFQ == T)
      {
        LFQ_quant_with_background <- base::data.frame(Gene_Name=UniProt_to_GeneName$Gene_Name[match(rownames(LFQ_quant_with_background),UniProt_to_GeneName$UniProt_ID)],UniProt_Identifier=rownames(LFQ_quant_with_background),LFQ_quant_with_background)
        rownames(LFQ_quant_with_background) <- c()
        colnames(LFQ_quant_with_background) <- base::gsub("^X","",colnames(LFQ_quant_with_background))

        LFQ_quant_with_background_imputed <- base::data.frame(Gene_Name=UniProt_to_GeneName$Gene_Name[match(rownames(LFQ_quant_with_background_imputed),UniProt_to_GeneName$UniProt_ID)],UniProt_Identifier=rownames(LFQ_quant_with_background_imputed),LFQ_quant_with_background_imputed)
        rownames(LFQ_quant_with_background_imputed) <- c()
        colnames(LFQ_quant_with_background_imputed) <- base::gsub("^X","",colnames(LFQ_quant_with_background_imputed))
      }

    }

    if(calc_protein_LFQ == T)
    {
      utils::write.table(x = LFQ_quant_with_background,file = base::paste(path_to_features,"/Proteins_quantification_LFQ",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")
      utils::write.table(x = LFQ_quant_with_background_imputed,file = base::paste(path_to_features,"/Proteins_quantification_LFQ_imputed",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")
    }
    utils::write.table(x = Top3_quant_with_background,file = base::paste(path_to_features,"/Proteins_quantification_Top3",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")
    utils::write.table(x = Top3_quant_with_background_imputed,file = base::paste(path_to_features,"/Proteins_quantification_Top3_imputed",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")
    utils::write.table(x = Total_quant_with_background,file = base::paste(path_to_features,"/Proteins_quantification_Total",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")
    utils::write.table(x = Total_quant_with_background_imputed,file = base::paste(path_to_features,"/Proteins_quantification_Total_imputed",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")

    if(calc_peptide_LFQ == T)
    {
      utils::write.table(x = LFQ_peptide_quant_with_background,file = base::paste(path_to_features,"/Peptides_quantification_LFQ",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")
      utils::write.table(x = LFQ_peptide_quant_with_background_imputed,file = base::paste(path_to_features,"/Peptides_quantification_LFQ_imputed",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")
    }

    print(paste0(Sys.time()," Protein-level aggregation finished"))

  }

  setwd(path_to_features)

  ###finally save feature level quantification
  print(paste0(Sys.time()," Store all results"))

  utils::write.table(x = features,file = base::paste(path_to_features,"/Features",output_file_names_add,".tab",sep=""),row.names = F,sep = "\t")
  utils::write.table(x = feature_with_background_intensity,file = base::paste(path_to_features,"/Features_quantification",output_file_names_add,".tab",sep=""),row.names = T,sep = "\t")
  utils::write.table(x = feature_with_background_intensity_imputed,file = base::paste(path_to_features,"/Features_quantification_imputed",output_file_names_add,".tab",sep=""),row.names = T,sep = "\t")
  utils::write.table(x = pval_signal_with_background_quant,file = base::paste(path_to_features,"/Features_quantification_pvals",output_file_names_add,".tab",sep=""),row.names = T,sep = "\t")
  utils::write.table(x = Ioncount_feature_with_background_intensity,file = base::paste(path_to_features,"/Features_quantification_ioncount",output_file_names_add,".tab",sep=""),row.names = T,sep = "\t")
  utils::write.table(x = S2B,file = base::paste(path_to_features,"/Features_quantification_S2B",output_file_names_add,".tab",sep=""),row.names = T,sep = "\t")
  utils::write.table(x = alignment_variability_score,file = base::paste(path_to_features,"/Features_quantification_variability_score",output_file_names_add,".tab",sep=""),row.names = T,sep = "\t")
  utils::write.table(x = alignment_scores_peaks_correct,file = base::paste(path_to_features,"/Features_quantification_alignment_score",output_file_names_add,".tab",sep=""),row.names = T,sep = "\t")
  utils::write.table(x = mono_iso_alignment_summary,file = base::paste(path_to_features,"/Features_quantification_mono_iso_alignment_score",output_file_names_add,".tab",sep=""),row.names = T,sep = "\t")

  save(QC_data,file = base::paste("Temporary_files/Feature_quantification_QC_data.RData",sep=""))
  options(warn=0)
}


#' Peak decision algorithm only designed for internal use
#' @param features_select Features on which peak-selection should be performed
#' @param peak_quant List of determined peaks per feature
#' @param samples String vector of all samples
#' @param s Integer indicating index of current sample
#' @param RT_correction_factors RT corrections
#' @param mz_correction_factors mz corrections
#' @param features_intensity_sample features_intensity_sample
#' @param Ioncount_sample Ioncount_sample
#' @param feature_with_background_intensity_sample feature_with_background_intensity_sample
#' @param Ioncount_with_background_sample Ioncount_with_background_sample
#' @param peak_selected_sample peak_selected_sample
#' @param delta_mz delta_mz
#' @param delta_rt delta_rt
#' @param peak_min_ion_count peak_min_ion_count
#' @param chunk chunk
#' @param num_chunks num_chunks
#' @param progress Progressbar
#' @import data.table
#' @export
#' @details Peak decision algorithm
peak_decision <- function(features_select,peak_quant,samples,s,RT_correction_factors,mz_correction_factors,features_intensity_sample,Ioncount_sample,feature_with_background_intensity_sample,Ioncount_with_background_sample,peak_selected_sample,delta_mz,delta_rt,peak_min_ion_count,chunk=NULL,num_chunks=NULL,progress=T)
{
  #prevent issues during R CMD check
  ..s <- s
  rm(..s)
  ..known_peaks_indices <- 1
  rm(..known_peaks_indices)
  ..o <- 1
  rm(..o)

  #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
  #suppressWarnings(suppressMessages(library(lubridate,quietly = T)))
  if(progress == T)
  {
    pb <- tcltk::tkProgressBar(title = base::paste("Prepare for peak selection -",samples[s]),label=base::paste( round(0/1*100, 0),"% done"), min = 0,max = 1, width = 300)
    close(pb)
  }

  ###next, decide for all other feature quantifications for which at least 1 peak was available which peak quantification should be used
  max <- nrow(features_select)

  if(progress == T)
  {
    if(!is.null(chunk) & !is.null(num_chunks))
    {
      pb <- tcltk::tkProgressBar(title = base::paste("Select peaks (",chunk,"/",num_chunks,") - ",samples[s],sep=""),label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
    }else
    {
      pb <- tcltk::tkProgressBar(title = base::paste("Select peaks -",samples[s]),label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
    }
  }

  start_time <- Sys.time()
  updatecounter <- 0
  time_require <- 0

  for(i in 1:nrow(features_select))
  {
    num_detected_peaks <- ifelse(!is.na(peak_quant$Standard$num_peaks_with_background[i,..s]),as.numeric(peak_quant$Standard$num_peaks_with_background[i,..s]),
                                 ifelse(!is.na(peak_quant$Peak_1$num_peaks_with_background[i,..s]),as.numeric(peak_quant$Peak_1$num_peaks_with_background[i,..s]),NA))
    if(!is.na(num_detected_peaks)) ##peaks detected?
    {
      ###check if more than 1 peak was detected and true peak is unknown
      if(num_detected_peaks > 1)
      {
        # if(!is.na(peak_quant$Peak_1$correct_peak_with_background[i,..s]) & peak_quant$Peak_1$correct_peak_with_background[i,..s] == 1) ###correct peak is known so take the closest peak
        # {
        #   ###already done in batch before
        #   ###so nothing to be done here
        #
        #   # data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Peak_1$features_intensity[i,..s]))
        #   # data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Peak_1$Ioncount_feature_sample_matrix[i,..s]))
        #   # data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Peak_1$feature_with_background_intensity[i,..s]))
        #   # data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Peak_1$Ioncount_feature_with_background_intensity[i,..s]))
        #   #
        #   # data.table::set(dT1_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Peak_1$dT1[i,..s]))
        #   # data.table::set(dM1_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Peak_1$dM1[i,..s]))
        #
        # }
        if(peak_quant$Peak_1$correct_peak_with_background[i,..s] == 0) ###more than 1 peaks but true peak not known
        {
          ###check if for any sample the correct peak is known
          num_known_peaks <- length(which(peak_quant$Peak_1$correct_peak_with_background[i,] != 0))
          if(num_known_peaks > 0)
          {
            ###now check where correct known peak usually is located in samples were true location is known
            known_peaks_indices <- which(peak_quant$Peak_1$correct_peak_with_background[i,] == 1)

            RT_in_known <- as.numeric(peak_quant$Peak_1$Peak_rt_with_background[i,..known_peaks_indices])
            mz_in_known <- as.numeric(peak_quant$Peak_1$Peak_mz_with_background[i,..known_peaks_indices])

            ###get RT and mz for detected peaks in current sample
            detected_peaks_rt <- vector("numeric",as.numeric(num_detected_peaks))
            detected_peaks_mz <- vector("numeric",as.numeric(num_detected_peaks))

            names(detected_peaks_rt) <- 1:as.numeric(num_detected_peaks)
            names(detected_peaks_mz) <- 1:as.numeric(num_detected_peaks)

            for(p in 1:length(detected_peaks_rt))
            {
              detected_peaks_rt[p] <- as.numeric(peak_quant[[p]]$Peak_rt_with_background[i,..s])
              detected_peaks_mz[p] <- as.numeric(peak_quant[[p]]$Peak_mz_with_background[i,..s])
            }

            if(length(which(!is.na(detected_peaks_rt) & !is.na(detected_peaks_mz))) > 0)
            {
              detected_peaks_rt <- detected_peaks_rt[which(!is.na(detected_peaks_rt))]
              detected_peaks_mz <- detected_peaks_mz[which(!is.na(detected_peaks_mz))]

              ###now compare observed deviations in RT and mz for known peaks with the deviation for the potential peaks in current sample
              ###for this comparison we have to correct detected peaks RT and m/z with the RT and mz correction factors per corresponding sample
              ###otherwise it would be possible that current sample shows e.g. a global RT shift which could result in a major deviation from all other known samples
              ###thus a peak wouldn?t be selected although it e.g. perfectly lies close to the expected window
              RT_in_known_corrected <- as.numeric(RT_in_known - RT_correction_factors[i,known_peaks_indices])
              mz_in_known_corrected <- as.numeric(mz_in_known - mz_correction_factors[i,known_peaks_indices])

              detected_peaks_rt_corrected <- as.numeric(detected_peaks_rt - RT_correction_factors[i,s])
              detected_peaks_mz_corrected <- as.numeric(detected_peaks_mz - mz_correction_factors[i,s])
              names(detected_peaks_rt_corrected) <- names(detected_peaks_rt)
              names(detected_peaks_mz_corrected) <- names(detected_peaks_mz)
              ###check if the detected RT and mz of peaks are likely to be belonging to the same population (compared to known peaks)
              ###99 % of observed data points lie within 2*sd range --> if delta RT or delta mZ > 2*sd then most likely this is the wrong peak
              ###so we assume all peaks within this RT and mz deviation to be valid peak candidates
              ###if these deviations are smaller then the expected RT and mz window, we expand these acceptance criteria accordingly
              delta_RT_cut <- 3*stats::sd(RT_in_known_corrected,na.rm=T)
              if(is.na(delta_RT_cut) | delta_RT_cut < 2*delta_rt)delta_RT_cut <- 2*delta_rt
              delta_mz_cut <- 3*stats::sd(mz_in_known_corrected,na.rm=T)
              if(is.na(delta_mz_cut) | delta_mz_cut < 3*delta_mz)delta_mz_cut <- 3*delta_mz

              within_range <- ifelse(abs(detected_peaks_rt_corrected-mean(RT_in_known_corrected,na.rm=T)) <= delta_RT_cut &
                                       abs(detected_peaks_mz_corrected-mean(mz_in_known_corrected,na.rm=T)) <= delta_mz_cut,T,F)

              if(any(within_range))
              {
                detected_peaks_rt <- detected_peaks_rt[within_range]
                detected_peaks_mz <- detected_peaks_mz[within_range]
                detected_peaks_rt_corrected <- detected_peaks_rt_corrected[within_range]
                detected_peaks_mz_corrected <- detected_peaks_mz_corrected[within_range]

                ###now check that at the position of the potential peak no other peak is present in samples with known peak
                count_other_peaks <- sum(peak_quant$Peak_1$num_peaks_with_background[i,..known_peaks_indices]-1)
                if(count_other_peaks > 0)
                {
                  other_peaks <- base::as.data.frame(matrix(ncol=6,nrow=count_other_peaks,0))
                  counter <- 0

                  for(o in known_peaks_indices) ###collect peak data for all additional peaks in samples were peak was known
                  {
                    cur_peak_count <- as.numeric(peak_quant$Peak_1$num_peaks_with_background[i,..o]-1)
                    if(cur_peak_count > 0)
                    {
                      for(p in 1:cur_peak_count)
                      {
                        counter <- counter + 1
                        temp <- c(as.numeric(peak_quant[[p+1]]$Peak_mz_with_background[i,..o] - mz_correction_factors[i,o]),
                                  as.numeric(peak_quant[[p+1]]$Peak_rt_with_background[i,..o] - RT_correction_factors[i,o]),
                                  as.numeric(peak_quant[[p+1]]$feature_with_background_intensity[i,..o]),
                                  as.numeric(peak_quant[[p+1]]$Ioncount_feature_with_background_intensity[i,..o]))
                        ##Add distance in RT and mz to known peak
                        temp[5:6] <- c(abs(temp[1] - as.numeric(peak_quant[[1]]$Peak_mz_with_background[i,..o] - mz_correction_factors[i,o])),
                                       abs(temp[2] - as.numeric(peak_quant[[1]]$Peak_rt_with_background[i,..o] - RT_correction_factors[i,o])))

                        data.table::set(other_peaks,as.integer(counter),as.integer(1:6),value=list(temp[1],temp[2],temp[3],temp[4],temp[5],temp[6]))
                      }
                    }
                  }
                  #disregard other peaks below significance threshold
                  other_peaks <- other_peaks[which(other_peaks$V4 > peak_min_ion_count),]
                  #disregard other peaks which are too close to the expected peak
                  #cutoff RT: > 1.1* half peak width -> half RT extraction window
                  #cutoff mz: > 1.1* delta_mz -> half mz extraction window
                  other_peaks <- other_peaks[which(other_peaks$V5 > delta_mz*1.1 | other_peaks$V6 > ((features_select$RT_length[i]/2)*1.1)),]
                  ###now check that selected peaks are not close to other peaks in sampels with known peak
                  overlap <- vector("logical",length(detected_peaks_rt))
                  for(o in 1:length(detected_peaks_rt))
                  {
                    overlap[o] <- ifelse(any(abs(detected_peaks_rt_corrected[o]-other_peaks$V2) <= (delta_rt/2) & abs(detected_peaks_mz_corrected[o]-other_peaks$V1) <= delta_mz/2,na.rm=T),T,F)
                  }
                }else
                {
                  overlap <- vector("logical",length(detected_peaks_rt))
                }

                if(any(overlap == F)) ###not overlapping with any other peak
                {
                  detected_peaks_rt <- detected_peaks_rt_corrected[which(overlap == F)]
                  detected_peaks_mz <- detected_peaks_mz_corrected[which(overlap == F)]
                  RT_in_known_other_peaks <- as.numeric(peak_quant$Peak_1$Peak_rt_with_background[i,..known_peaks_indices])
                  mz_in_known_other_peaks <- as.numeric(peak_quant$Peak_1$Peak_mz_with_background[i,..known_peaks_indices])

                  ###select the peak which is closest to all other known peaks

                  sum_delta_to_known_peaks_rt <- vector("numeric",length(detected_peaks_rt))
                  sum_delta_to_known_peaks_mz <- vector("numeric",length(detected_peaks_rt))
                  sum_delta_to_known_peaks <- vector("numeric",length(detected_peaks_rt))
                  for(p in 1:length(detected_peaks_rt))
                  {
                    sum_delta_to_known_peaks_rt[p] <- sum(detected_peaks_rt[p]-RT_in_known_corrected,na.rm=T)/length(RT_in_known_corrected)
                    sum_delta_to_known_peaks_mz[p] <- sum((detected_peaks_mz[p]-mz_in_known_corrected)*500,na.rm=T)/length(RT_in_known_corrected)
                    sum_delta_to_known_peaks[p] <- abs(sum_delta_to_known_peaks_rt[p])+abs(sum_delta_to_known_peaks_mz[p])
                  }

                  selected_peak <- which(!is.na(sum_delta_to_known_peaks) & sum_delta_to_known_peaks == min(sum_delta_to_known_peaks[which(!is.na(sum_delta_to_known_peaks))],na.rm=T))[1]
                  selected_peak <- as.numeric(names(detected_peaks_rt)[selected_peak])
                  data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant[[selected_peak]]$features_intensity[i,..s]))
                  data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant[[selected_peak]]$Ioncount_feature_sample_matrix[i,..s]))

                  data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant[[selected_peak]]$feature_with_background_intensity[i,..s]))
                  data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant[[selected_peak]]$Ioncount_feature_with_background_intensity[i,..s]))

                  data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(selected_peak))

                }else###none of the peaks is not overlapping with an other peak detected in other samples where correct peak was known
                {
                  data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$features_intensity[i,..s]))
                  data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_sample_matrix[i,..s]))

                  data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$feature_with_background_intensity[i,..s]))
                  data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_with_background_intensity[i,..s]))

                  data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(length(peak_quant)))
                }

              }else ###none of the peaks is within the accepted range thus use standard window
              {
                data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$features_intensity[i,..s]))
                data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_sample_matrix[i,..s]))

                data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$feature_with_background_intensity[i,..s]))
                data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_with_background_intensity[i,..s]))

                data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(length(peak_quant)))
              }
            }else ###none of the peaks is valid
            {
              data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$features_intensity[i,..s]))
              data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_sample_matrix[i,..s]))

              data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$feature_with_background_intensity[i,..s]))
              data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_with_background_intensity[i,..s]))

              data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(length(peak_quant)))
            }


          }else ###in no other sample the correct peak is known thus take total window
          {
            data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$features_intensity[i,..s]))
            data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_sample_matrix[i,..s]))

            data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$feature_with_background_intensity[i,..s]))
            data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_with_background_intensity[i,..s]))

            data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(length(peak_quant)))
          }
        }
      }else if(num_detected_peaks == 1) ###one peak was detected
      {
        ### check if correct peak is unknown
        if(peak_quant$Peak_1$correct_peak_with_background[i,..s] == 0)
        {
          ###if this is the case check if detected peak is comparable to correct peaks in other samples where correct peak was known
          ###check if for any sample the correct peak is known
          num_known_peaks <- length(which(peak_quant$Peak_1$correct_peak_with_background[i,] != 0))
          if(num_known_peaks > 0)
          {
            ###now check where correct known peak usually is located in samples were true location is known
            known_peaks_indices <- which(peak_quant$Peak_1$correct_peak_with_background[i,] == 1)

            RT_in_known <- as.numeric(peak_quant$Peak_1$Peak_rt_with_background[i,..known_peaks_indices])
            mz_in_known <- as.numeric(peak_quant$Peak_1$Peak_mz_with_background[i,..known_peaks_indices])

            detected_peaks_rt <- as.numeric(peak_quant[[1]]$Peak_rt_with_background[i,..s])
            detected_peaks_mz <- as.numeric(peak_quant[[1]]$Peak_mz_with_background[i,..s])
            if(length(which(!is.na(detected_peaks_rt) & !is.na(detected_peaks_mz))) > 0)
            {
              detected_peaks_rt <- detected_peaks_rt[which(!is.na(detected_peaks_rt))]
              detected_peaks_mz <- detected_peaks_mz[which(!is.na(detected_peaks_mz))]

              ###now compare observed deviations in RT and mz for known peaks with the deviation for the potential peaks in current sample
              ###for this comparison we have to correct detected peaks RT and m/z with the RT and mz correction factors per corresponding sample
              ###otherwise it would be possible that current sample shows e.g. a global RT shift which could result in a major deviation from all other known samples
              ###thus a peak wouldn?t be selected although it e.g. perfectly lies close to the expected window
              RT_in_known_corrected <- as.numeric(RT_in_known - RT_correction_factors[i,known_peaks_indices])
              mz_in_known_corrected <- as.numeric(mz_in_known - mz_correction_factors[i,known_peaks_indices])

              detected_peaks_rt_corrected <- as.numeric(detected_peaks_rt - RT_correction_factors[i,s])
              detected_peaks_mz_corrected <- as.numeric(detected_peaks_mz - mz_correction_factors[i,s])

              ###check if the detected RT and mz of peaks are likely to be belonging to the same population (compared to known peaks)
              ###99 % of observed data points lie within 3*sd range --> if delta RT or delta mZ > 2*sd then most likely this is the wrong peak
              ###so we assume all peaks within this RT and mz deviation to be valid peak candidates
              ###if these deviations are smaller then the expected RT and mz window, we expand these acceptance criteria accordingly
              delta_RT_cut <- 3*stats::sd(RT_in_known,na.rm=T)
              if(is.na(delta_RT_cut) | delta_RT_cut < 2*delta_rt)delta_RT_cut <- 2*delta_rt
              delta_mz_cut <- 3*stats::sd(mz_in_known,na.rm=T)
              if(is.na(delta_mz_cut) | delta_mz_cut < 3*delta_mz)delta_mz_cut <- 3*delta_mz

              within_range <- ifelse(abs(detected_peaks_rt_corrected-mean(RT_in_known_corrected,na.rm=T)) <= delta_RT_cut &
                                       abs(detected_peaks_mz_corrected-mean(mz_in_known_corrected,na.rm=T)) <= delta_mz_cut,T,F)

              if(any(within_range))
              {
                detected_peaks_rt <- detected_peaks_rt[within_range]
                detected_peaks_mz <- detected_peaks_mz[within_range]
                detected_peaks_rt_corrected <- detected_peaks_rt_corrected[within_range]
                detected_peaks_mz_corrected <- detected_peaks_mz_corrected[within_range]

                ###now check that at the position of the potential peak no other peak is present in samples with known peak
                count_other_peaks <- sum(peak_quant$Peak_1$num_peaks_with_background[i,..known_peaks_indices]-1)
                if(count_other_peaks > 0)
                {
                  other_peaks <- base::as.data.frame(matrix(ncol=6,nrow=count_other_peaks,0))
                  counter <- 0

                  for(o in known_peaks_indices) ###collect peak data for all additional peaks in samples were peak was known
                  {
                    cur_peak_count <- as.numeric(peak_quant$Peak_1$num_peaks_with_background[i,..o]-1)
                    if(cur_peak_count > 0)
                    {
                      for(p in 1:cur_peak_count)
                      {
                        counter <- counter + 1
                        temp <- c(as.numeric(peak_quant[[p+1]]$Peak_mz_with_background[i,..o] - mz_correction_factors[i,o]),
                                  as.numeric(peak_quant[[p+1]]$Peak_rt_with_background[i,..o] - RT_correction_factors[i,o]),
                                  as.numeric(peak_quant[[p+1]]$feature_with_background_intensity[i,..o]),
                                  as.numeric(peak_quant[[p+1]]$Ioncount_feature_with_background_intensity[i,..o]))
                        ##Add distance in RT and mz to known peak
                        temp[5:6] <- c(abs(temp[1] - as.numeric(peak_quant[[1]]$Peak_mz_with_background[i,..o] - mz_correction_factors[i,o])),
                                       abs(temp[2] - as.numeric(peak_quant[[1]]$Peak_rt_with_background[i,..o] - RT_correction_factors[i,o])))
                      }
                    }
                  }
                  #disregard other peaks below significance threshold
                  other_peaks <- other_peaks[which(other_peaks$V4 > peak_min_ion_count),]
                  #disregard other peaks which are too close to the expected peak
                  #cutoff RT: > 1.1* half peak width -> half RT extraction window
                  #cutoff mz: > 1.1* delta_mz -> half mz extraction window
                  other_peaks <- other_peaks[which(other_peaks$V5 > delta_mz*1.1 | other_peaks$V6 > ((features_select$RT_length[i]/2)*1.1)),]
                  ###now check that selected peaks are not close to other peaks in sampels with known peak

                  overlap <- ifelse(any(abs(detected_peaks_rt_corrected-other_peaks$V2) <= delta_rt/2 & abs(detected_peaks_mz_corrected-other_peaks$V1) <= delta_mz/2,na.rm=T),T,F)
                }else
                {
                  overlap <- F
                }

                if(any(overlap == F)) ###not overlapping with any other peak
                {
                  selected_peak <- 1

                  data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant[[selected_peak]]$features_intensity[i,..s]))
                  data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant[[selected_peak]]$Ioncount_feature_sample_matrix[i,..s]))

                  data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant[[selected_peak]]$feature_with_background_intensity[i,..s]))
                  data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant[[selected_peak]]$Ioncount_feature_with_background_intensity[i,..s]))

                  data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(selected_peak))
                }else###the peak is overlapping with an other peak detected in other samples where correct peak was known
                {
                  data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$features_intensity[i,..s]))
                  data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_sample_matrix[i,..s]))

                  data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$feature_with_background_intensity[i,..s]))
                  data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_with_background_intensity[i,..s]))

                  data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(length(peak_quant)))
                }
              }else ###none of the peaks is within the accepted range thus use standard window
              {
                data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$features_intensity[i,..s]))
                data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_sample_matrix[i,..s]))

                data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$feature_with_background_intensity[i,..s]))
                data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_with_background_intensity[i,..s]))

                data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(length(peak_quant)))
              }
            }else ###no peak was valid
            {
              data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$features_intensity[i,..s]))
              data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_sample_matrix[i,..s]))

              data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$feature_with_background_intensity[i,..s]))
              data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_with_background_intensity[i,..s]))

              data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(length(peak_quant)))
            }
          }else ###in no other sample the correct peak was known thus use the standard window
          {
            data.table::set(features_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$features_intensity[i,..s]))
            data.table::set(Ioncount_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_sample_matrix[i,..s]))

            data.table::set(feature_with_background_intensity_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$feature_with_background_intensity[i,..s]))
            data.table::set(Ioncount_with_background_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(peak_quant$Standard$Ioncount_feature_with_background_intensity[i,..s]))

            data.table::set(peak_selected_sample,i = as.integer(i),j = as.integer(1),value = as.numeric(length(peak_quant)))
          }


        }


      }
    }

    updatecounter <- updatecounter + 1
    if(updatecounter >= 10 & progress == T)
    {
      time_elapsed <- difftime(Sys.time(),start_time,units="secs")
      time_require <- (time_elapsed/(i/max))*(1-(i/max))
      td <- lubridate::seconds_to_period(time_require)
      time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

      updatecounter <- 0
      tcltk::setTkProgressBar(pb, i, label=base::paste( round(i/max*100, 0)," % done (",i,"/",max,", Time require: ",time_require,")",sep = ""))
    }

  }
  if(progress == T)
  {
    close(pb)
  }

  return(list(features_intensity_sample=features_intensity_sample,
              Ioncount_sample=Ioncount_sample,
              feature_with_background_intensity_sample=feature_with_background_intensity_sample,
              Ioncount_with_background_sample=Ioncount_with_background_sample,
              peak_selected_sample=peak_selected_sample))
}

#' Peak selection FDR algorithm for internal use
#' @param num_features num_features
#' @param features features
#' @param samples samples
#' @param peaks peaks
#' @param path_to_features path_to_features
#' @param peak_quant peak_quant
#' @param feature_with_background_intensity feature_with_background_intensity
#' @param peak_selected peak_selected
#' @param delta_mz delta_mz
#' @param delta_rt delta_rt
#' @param peak_min_ion_count peak_min_ion_count
#' @param num_peaks_store num_peaks_store
#' @param alignment_scores_cutoff alignment_scores_cutoff
#' @param n_cores n_cores
#' @param peak_decision peak_decision
#' @param Alignment_scoring Alignment_scoring
#' @param seed seed
#' @param plot plot
#' @import data.table
#' @import foreach
#' @import randomForest
#' @export
#' @details Peak selection FDR algorithm for internal use
Peak_selection_FDR <- function(num_features=500,features,samples,peaks,path_to_features,peak_quant,feature_with_background_intensity,peak_selected,delta_mz,delta_rt,peak_min_ion_count,num_peaks_store,alignment_scores_cutoff,n_cores,peak_decision,Alignment_scoring,seed=110519,plot=T)
{
  if(length(which(peaks$known == 1)) >= 10)
  {
    #suppressWarnings(suppressMessages(library(data.table,quietly = T)))
    #suppressWarnings(suppressMessages(library(randomForest,quietly = T)))
    #suppressWarnings(suppressMessages(library(mgcv,quietly = T)))
    #select features for FDR estimation per sample
    if(!is.na(seed))set.seed(seed)
    temp_peaks <- peaks[which(peaks$peak == peaks$selected & peaks$known == 1 & !grepl("_i|_d",peaks$feature)),]
    selection_feature <- sample(unique(temp_peaks$feature),length(unique(temp_peaks$feature)),replace = F)
    selection_feature <- match(selection_feature,features$Feature_name)
    selection_feature_per_sample <- list()

    for(s in 1:length(samples))
    {
      temp_peaks_2 <- temp_peaks[which(temp_peaks$sample == samples[s]),]
      temp_peaks_2 <- temp_peaks_2[match(features$Feature_name[selection_feature],temp_peaks_2$feature),]
      sel <- which(!is.na(temp_peaks_2$known))[1:num_features]
      if(any(is.na(sel)))sel <- sel[which(!is.na(sel))]
      selection_feature_per_sample[[s]] <- selection_feature[sel]
    }

    #remove known peak location for selected features and samples and replace with predicted corrections based on models
    features_select_FDR <- list()
    features$Observed_mz <- as.character(features$Observed_mz)
    features$Observed_RT <- as.character(features$Observed_RT)

    setwd(base::paste(path_to_features,"/Temporary_files",sep=""))
    e <- new.env()
    load("Feature_alignment_QC_data.RData",envir = e)
    median_feature_properties <- e$QC_data$mz_calibration_median_feature_properties
    mz_correction_models <- e$QC_data$mz_calibration_models
    RT_alignment_GAM_models <- e$QC_data$RT_calibration_GAM_models

    for(s in 1:length(samples))
    {
      temp <- data.table::copy(features)
      temp <- temp[selection_feature_per_sample[[s]],]
      if(nrow(temp)>10)
      {
        #remove observed RT and mz
        obs_rt <- (stringr::str_split(temp$Observed_RT,";",simplify = T))
        obs_rt[,s] <- "NA"
        obs_mz <- (stringr::str_split(temp$Observed_mz,";",simplify = T))
        obs_mz[,s] <- "NA"
        temp$Observed_mz <- apply(obs_mz,1,base::paste,collapse = ";")
        #exchange mz calibration with prediction from model
        select_model <- which(names(mz_correction_models) == samples[s])

        features_select <- selection_feature_per_sample[[s]]

        temp_data <- median_feature_properties[match(features_select,median_feature_properties$Feature),]

        temp_data$Resolution <- 0

        temp_data <- temp_data[which(rowSums(is.na(temp_data[,c("Retention.time","m.z","Charge","Resolution")])) == 0),]

        if(length(select_model)>0)
        {
          if(nrow(temp_data) > 0)
          {
            prediction <- stats::predict(mz_correction_models[[select_model]], temp_data[,-1], type = "response")
            if(any(is.na(prediction)))prediction[which(is.na(prediction))] <- 0

            data.table::set(temp,as.integer(match(rownames(temp_data),rownames(temp))),as.integer(which(colnames(temp) == base::paste("mz_calibration.",samples[s],sep=""))),prediction)
          }else
          {
            data.table::set(temp,as.integer(match(rownames(temp_data),rownames(temp))),as.integer(which(colnames(temp) == base::paste("mz_calibration.",samples[s],sep=""))),0)
          }
        }else
        {
          data.table::set(temp,as.integer(match(rownames(temp_data),rownames(temp))),as.integer(which(grepl("mz_calibration.",colnames(temp)))[s]),0)
        }
        #exchange RT calibration with prediction from model
        select_model <- s
        if(!is.null(RT_alignment_GAM_models[[select_model]]))
        {
          prediction <- stats::predict(RT_alignment_GAM_models[[select_model]], base::data.frame(x = temp$RT))
          if(any(is.na(prediction)))prediction[which(is.na(prediction))] <- 0
          data.table::set(temp,as.integer(1:nrow(temp)),as.integer(which(colnames(temp) == base::paste("RT_calibration.",samples[s],sep=""))),prediction)

        }else
        {
          data.table::set(temp,as.integer(1:nrow(temp)),as.integer(which(colnames(temp) == base::paste("RT_calibration.",samples[s],sep=""))),0)
        }

        features_select_FDR[[s]] <- temp
      }else
      {
        features_select_FDR[[s]] <- temp
      }
    }

    #prepare columns containing correction information
    indices_RT_correction <- which(grepl("RT_calibration",colnames(features)))
    indices_mz_correction <- which(grepl("mz_calibration",colnames(features)))
    ordering_indices <- match(base::gsub("-",".",samples),base::gsub("RT_calibration\\.","",colnames(features)[indices_RT_correction]))
    indices_RT_correction <- indices_RT_correction[ordering_indices]
    indices_mz_correction <- indices_mz_correction[ordering_indices]

    #reduce peak_quant information to only cover features selected for FDR calculation

    #prepare only required peak quant data
    peak_quant_reduced <- data.table::copy(peak_quant)
    all_selected_features <- unique(unlist(selection_feature_per_sample))

    for(p in 1:(num_peaks_store+1))
    {
      for(p2 in 1:length(peak_quant_reduced[[p]]))
      {
        peak_quant_reduced[[p]][[p2]] <- peak_quant_reduced[[p]][[p2]][all_selected_features,]
      }
    }

    #suppressWarnings(suppressMessages(library(doSNOW,quietly = T)))

    cl <- snow::makeCluster(n_cores)#as.numeric(Sys.getenv('NUMBER_OF_PROCESSORS')))
    doSNOW::registerDoSNOW(cl)

    start <- Sys.time()

    res <- foreach::foreach(s=1:length(samples)) %dopar%
      {
        #suppressWarnings(suppressMessages(library(data.table,quietly = T)))

        selection_feature <- selection_feature_per_sample[[s]]
        if(length(selection_feature)>0)
        {
          max <- 1
          pb <- tcltk::tkProgressBar(title = "Evaluate peak selection FDR",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)

          features_select_FDR_cur <- features_select_FDR[[s]]
          #now perform peak decision for FDR_features and indicate if same peak was selected as before
          peak_decision_same_without_peak_known <- vector("logical",length(selection_feature))

          RT_correction_factors <- features_select_FDR_cur[,indices_RT_correction]
          mz_correction_factors <- features_select_FDR_cur[,indices_mz_correction]
          colnames(RT_correction_factors) <- samples
          rownames(RT_correction_factors) <- features_select_FDR_cur$Feature_name
          colnames(mz_correction_factors) <- samples
          rownames(mz_correction_factors) <- features_select_FDR_cur$Feature_name

          feature_with_background_intensity_select_FDR <- data.table::copy(feature_with_background_intensity)
          peak_selected_select_FDR <- data.table::copy(peak_selected)

          temp_dummy_df <-  data.table::data.table(Name=rep(0L,length(selection_feature)))
          colnames(temp_dummy_df) <- samples[s]
          close(pb)

          peak_quant_temp <- data.table::copy(peak_quant_reduced)

          for(p in 1:(num_peaks_store+1))
          {
            for(p2 in 1:length(peak_quant_temp[[p]]))
            {
              peak_quant_temp[[p]][[p2]] <- peak_quant_temp[[p]][[p2]][match(selection_feature,all_selected_features),]
            }
          }

          peak_quant_temp$Peak_1$correct_peak_with_background[,s] <- 0L

          res_temp <- peak_decision(features_select = features_select_FDR_cur,
                                    peak_quant = peak_quant_temp,
                                    samples = samples,
                                    s = s,
                                    RT_correction_factors = RT_correction_factors,
                                    mz_correction_factors = mz_correction_factors,
                                    features_intensity_sample = temp_dummy_df,
                                    Ioncount_sample = temp_dummy_df,
                                    feature_with_background_intensity_sample = feature_with_background_intensity_select_FDR[selection_feature,s,drop=F],
                                    Ioncount_with_background_sample = temp_dummy_df,
                                    peak_selected_sample = peak_selected_select_FDR[selection_feature,s,drop=F],
                                    delta_mz = delta_mz,
                                    delta_rt = delta_rt,
                                    peak_min_ion_count = peak_min_ion_count,
                                    progress = T)

          peak_decision_same_without_peak_known <- res_temp$peak_selected_sample[,1] == peak_selected[selection_feature,s]

          data.table::set(feature_with_background_intensity_select_FDR,as.integer(selection_feature),as.integer(s),res_temp$feature_with_background_intensity_sample)
          data.table::set(peak_selected_select_FDR,as.integer(selection_feature),as.integer(s),res_temp$peak_selected_sample)

          names(peak_decision_same_without_peak_known) <- base::paste(features_select_FDR_cur$Feature_name,"_Sample_",s,sep="")

          return(list(peak_decision_same_without_peak_known=peak_decision_same_without_peak_known,
                      feature_with_background_intensity_select_FDR=feature_with_background_intensity_select_FDR,
                      peak_selected_select_FDR=peak_selected_select_FDR))
        }else
        {
          return(NULL)
        }

      }
    snow::stopCluster(cl)


    #determine how many false selections show large intensity difference and would not be removed by alignment scoring
    results_peak_selection_FDR_all <- list()
    max <- length(samples)
    pb <- tcltk::tkProgressBar(title="Evaluate peak selection FDR",label=base::paste( round(0/max*100, 0),"% done"), min = 0,max = max, width = 300)
    start_time <- Sys.time()
    updatecounter <- 0
    time_require <- 0

    for(s in 1:length(samples))
    {
      if(!is.null(res[[s]]))
      {
        peak_decision_same_without_peak_known <- res[[s]]$peak_decision_same_without_peak_known
        peak_selected_select_FDR <- res[[s]]$peak_selected_select_FDR
        feature_with_background_intensity_select_FDR <- res[[s]]$feature_with_background_intensity_select_FDR
        selection_feature <- selection_feature_per_sample[[s]]
        if(length(which(peak_decision_same_without_peak_known == F))>0L)
        {
          selections_all <- 1:length(peak_decision_same_without_peak_known)

          #compare quantification differences for all tests
          compare_quant_results_for_wrong_decisions <- base::as.data.frame(matrix(ncol=2,nrow=length(selections_all)))
          colnames(compare_quant_results_for_wrong_decisions) <- c("correct","wrong")
          rownames(compare_quant_results_for_wrong_decisions) <- features_select_FDR[[s]]$Feature_name[selections_all]
          compare_quant_results_for_wrong_decisions$correct <- as.numeric(compare_quant_results_for_wrong_decisions$correct)
          compare_quant_results_for_wrong_decisions$wrong <- as.numeric(compare_quant_results_for_wrong_decisions$wrong)
          for(i in selections_all)
          {
            ind <- i
            data.table::set(compare_quant_results_for_wrong_decisions,as.integer(ind),as.integer(1:2),list(base::log2(10^feature_with_background_intensity_select_FDR[selection_feature[i],s]),
                                                                                                           base::log2(10^feature_with_background_intensity[selection_feature[i],s])))
          }
          ##compare intensities of unbiased peak selection and correct peak
          #determine distribution of quantification deviations between correct and unbiased selected peak
          if(plot==T)
          {
            # plot(density(compare_quant_results_for_wrong_decisions$wrong-compare_quant_results_for_wrong_decisions$correct,na.rm=T),xlab="Intensity-masked / Intensity-true, log2",main=base::paste(samples[s],"- Deviation in selected peak quantification"))
            # graphics::abline(v=0)
            # graphics::abline(v=-1,lty=2,col="red")
            # graphics::abline(v=1,lty=2,col="red")

            graphics::plot(compare_quant_results_for_wrong_decisions$correct,compare_quant_results_for_wrong_decisions$wrong,xlab="True peak quantification, log2",ylab="Masked peak quantification, log2",main=base::paste(samples[s],"- Error in quantification"))
            graphics::abline(a=0,b=1)
            graphics::abline(a=1,b=1,lty=2,col="red")
            graphics::abline(a=-1,b=1,lty=2,col="red")
          }

          #how many show deviation > 2 fold
          wrong_peak_intensity_outlier <- abs(compare_quant_results_for_wrong_decisions$wrong-compare_quant_results_for_wrong_decisions$correct) > 1

          #finally visualize results
          freq <- plyr::count(peak_decision_same_without_peak_known)
          if(length(which(freq$x == F)) == 0)
          {
            freq <- rbind(freq,base::data.frame(x=F,freq=0))
          }
          if(length(which(freq$x == T)) == 0)
          {
            freq <- rbind(freq,base::data.frame(x=T,freq=0))
          }
          freq$rel <- freq$freq/sum(freq$freq)*100

          plot_data <- c(freq$rel[which(freq$x==F)],
                         length(which(wrong_peak_intensity_outlier == T))/sum(freq$freq)*100)
          names(plot_data) <- c("Total",">2-fold intensity difference")

          results_peak_selection_FDR <- list(peak_decision_same_without_peak_known=peak_decision_same_without_peak_known,
                                             compare_quant_results_for_wrong_decisions=compare_quant_results_for_wrong_decisions,
                                             wrong_peak_intensity_outlier=wrong_peak_intensity_outlier,
                                             plot_data=plot_data)

        }else
        {
          freq <- plyr::count(peak_decision_same_without_peak_known)
          if(length(which(freq$x == F)) == 0)
          {
            freq <- rbind(freq,base::data.frame(x=F,freq=0))
          }
          if(length(which(freq$x == T)) == 0)
          {
            freq <- rbind(freq,base::data.frame(x=T,freq=0))
          }
          freq$rel <- freq$freq/sum(freq$freq)*100

          plot_data <- c(freq$rel[which(freq$x==F)],
                         0/sum(freq$freq)*100)
          names(plot_data) <- c("Total",">2-fold intensity difference")


          results_peak_selection_FDR <- list(peak_decision_same_without_peak_known=peak_decision_same_without_peak_known,
                                             compare_quant_results_for_wrong_decisions=NULL,
                                             wrong_peak_intensity_outlier=NULL,
                                             plot_data=plot_data)
        }
        results_peak_selection_FDR_all[[s]] <- results_peak_selection_FDR
      }else
      {
        results_peak_selection_FDR_all[[s]] <- NULL
      }


      updatecounter <- updatecounter + 1
      if(updatecounter >= 1)
      {
        time_elapsed <- difftime(Sys.time(),start_time,units="secs")
        time_require <- (time_elapsed/(s/max))*(1-(s/max))
        td <- lubridate::seconds_to_period(time_require)
        time_require <- sprintf('%02d:%02d:%02d', td@hour, lubridate::minute(td), round(lubridate::second(td),digits=0))

        updatecounter <- 0
        tcltk::setTkProgressBar(pb, s, label=base::paste( round(s/max*100, 0)," % done (",s,"/",max,", Time require: ",time_require,")",sep = ""))
      }
    }
    close(pb)

    ##plot results over all samples
    Total_FDR <- NULL
    Large_Intensity_delta_FDR <- NULL
    for(s in 1:length(samples))
    {
      if(length(results_peak_selection_FDR_all[[s]]$peak_decision_same_without_peak_known) >= 100) ###require at least 100 identified peptides in a sample
      {
        Total_FDR <- append(Total_FDR,results_peak_selection_FDR_all[[s]]$plot_data[1])
        Large_Intensity_delta_FDR <- append(Large_Intensity_delta_FDR,results_peak_selection_FDR_all[[s]]$plot_data[2])
      }else
      {
        print(base::paste(samples[s],": Not enough known peaks for peak selection FDR estimation",sep=""))
        Total_FDR <- append(Total_FDR,NA)
        Large_Intensity_delta_FDR <- append(Large_Intensity_delta_FDR,NA)
      }

    }
    names(Total_FDR) <- samples
    names(Large_Intensity_delta_FDR) <- samples

    if(plot == T)
    {
      # ylim <- c(0,ifelse(max(Total_FDR,na.rm=T)<5,5,max(Total_FDR,na.rm=T)))
      # p <- Barplots(Total_FDR,AvgLine = T,digits_average = 1,Name = samples,xlab = "",ylab="FDR [%]",main = "Peak selection FDR - Total",shownumbers = F,ylim=ylim)
      # graphics::abline(h=5,lty=2,col="red")

      ylim <- c(0,ifelse(max(Large_Intensity_delta_FDR,na.rm=T)<5,5,max(Large_Intensity_delta_FDR,na.rm=T)))
      p <- Barplots(Large_Intensity_delta_FDR,AvgLine = T,digits_average = 1,Name = samples,xlab = "",ylab="FDR [%]",main = base::paste("Peak selection FDR - > 2-fold intensity difference\nBased on n=",num_features," random draws",sep=""),shownumbers = F,ylim=ylim)
      graphics::abline(h=5,lty=2,col="red")

    }

  }else
  {
    print("Warning: The true peak for too few features is known. Skipping peak selection FDR estimation.")
    results_peak_selection_FDR_all <- NA
    Total_FDR <- NA
    Large_Intensity_delta_FDR <- NA
  }


  return(list(results_peak_selection_FDR_all=results_peak_selection_FDR_all,
              Total_FDR=Total_FDR,
              Large_Intensity_delta_FDR=Large_Intensity_delta_FDR))

}
mathiaskalxdorf/IceR documentation built on Aug. 1, 2022, 8:03 a.m.