CAMERA: Collection of annotation related methods for mass spectrometry data

Documented in annotateDiffreport cleanParallel combinexsAnnos findKendrickMasses findNeutralLoss findNeutralLossSpecs getpspectra xsAnnotate

xsAnnotate <- function(xs=NULL, sample=NA, nSlaves=1, polarity=NULL){

 ## sample is:
 ### NA for maxInt-way
 ##  1-nSamp for manual way
 ##  -1 for specDist way (to be implemented)
  if(is.null(xs)) { stop("no argument was given"); }
  else if(!class(xs)=="xcmsSet") stop("xs is no xcmsSet object ");
  
  object  <- new("xsAnnotate");
  
  if(length(sampnames(xs)) > 1 & !nrow(xs@groups) > 0) {  
      # more than one sample
      # checking alignment
      stop ('First argument must be a xcmsSet with group information or contain only one sample.') 
  }

  # check multiple sample selection
  if(length(sample) > 1){
    #example sample <- c(1,2,3,4,5)
  
    if(all(sample <= length(xs@filepaths)) & all(sample > 0)){
      #all sample values are in allowed range
      object@sample <- sample;
    }else{
        #one or more sample values are lower 0 or higher than number of samples (length(xs@filepaths))
        stop("All values in parameter sample must be lower equal the number of samples and greater than 0.\n")
      }
  }else{
    if(is.null(sample) || is.na(sample)) {
      #automatic sample selection, sample = NA
      if(length(xs@filepaths) == 1){
          #If samplesize == 1 than set sample to 1
          object@sample <- 1;
      }else{
          object@sample   <-  as.numeric(NA);
      }
    }else{
      if(sample == -1){
        #Joes Way
        object@sample <-  sample;
      }else if(length(xs@filepaths) < sample | sample < 1) {
        stop("Parameter sample must be lower equal than number of samples and greater than 0.\n")
      }else{       
        object@sample <-  sample;
      }
    }
  }

  object@groupInfo <- getPeaks_selection(xs);
  runParallel <- list()
  runParallel$enable   <-  0;

  if (nSlaves > 1) {
    ## If MPI is available ...
    rmpi = "Rmpi"
    opt.warn <- options("warn")$warn
    options("warn" = -1) 
    if ((Sys.info()["sysname"] != "Windows") && require(rmpi,character.only=TRUE) && !is.null(nSlaves)) {
      if (is.loaded('mpi_initialize')) {
        #test if not already slaves are running!
        if(mpi.comm.size() >0){ 
          warning("There are already intialized mpi slaves on your machine.\nCamera will try to uses them!\n");
          runParallel$enable <-1;
          runParallel$mode <- rmpi;
        }else{
          mpi.spawn.Rslaves(nslaves=nSlaves, needlog=FALSE)
          if(mpi.comm.size() > 1){
            #Slaves have successfull spawned
            runParallel$enable <-1;
            runParallel$mode <- rmpi;
          }else{ warning("Spawning of mpi slaves have failed. CAMERA will run without parallelization.\n");}
        }
      }else {
          #And now??
          warning("DLL mpi_initialize is not loaded. Run single core mode!\n");
      }
    } else {
      #try local sockets using snow package
      snow = "snow"
      if (try(require(snow,character.only=TRUE,quietly=TRUE))) {
        cat("Starting snow cluster with",nSlaves,"local sockets.\n")
        snowclust <- makeCluster(nSlaves, type = "SOCK")
        runParallel$enable <- 1
        runParallel$mode <- snow;
        runParallel$cluster <- snowclust
      }
    }
    options("warn" = opt.warn)
    cat("Run cleanParallel after processing to remove the spawned slave processes!\n")
  }

  if(!is.null(polarity)){
    if(is.na(match.arg(polarity, c("positive","negative")))){
      stop("Parameter polarity is unknown: ", graphMethod,"\n")  
    }else{
      object@polarity <- polarity;
    }
  }
  object@runParallel<-runParallel;

  colnames(object@annoID) <-  c("id","grpID","ruleID","parentID");
  colnames(object@annoGrp)<-  c("id","mass","ips","psgrp");
  colnames(object@isoID)  <-  c("mpeak","isopeak","iso","charge")

  #save xcmsSet in the xsAnnotate object
  object@xcmsSet  <-  xs;
  return(object);
}

setMethod("show", "xsAnnotate", function(object){
  #show main information
  cat("An \"xsAnnotate\" object!\n");
  cat("With",length(object@pspectra),"groups (pseudospectra)\n");
  if(!is.null(object@xcmsSet)){
    cat("With",length(sampnames(object@xcmsSet)),"samples and",nrow(object@groupInfo),"peaks\n");
  }else{
    cat("Include no xcmsSet set\n");
  }
    
  #Show polarity if avaiable
  if(length(object@polarity) > 0){
    cat("Polarity mode is set to: ",object@polarity,"\n");
  }
    
  #Show samples selection
  if(is.null(object@sample)){
    cat("Parameter sample not set\n");
  }else{
    if(is.na(object@sample[1])){
      cat(paste("Using automatic sample selection\n"));
    } else if(all(object@sample > -1)){
      cat("Using sample(s): ",paste(object@sample),"\n");
    } else { 
      cat(paste("Using complete measurement\n"));
    }
  }
  
  #Show isotope information
  if(length(object@isotopes) > 0){
    cnt <- nrow(object@isoID)
    cat("Annotated isotopes:", cnt, "\n");
  }

  #Show annotation information
  if(length(object@derivativeIons) > 0){
    cnt <- length(unique(object@annoID[, 1]));
    cat("Annotated adducts & fragments:", cnt, "\n");
  }

  #Show memory information
  memsize <- object.size(object)
  cat("Memory usage:", signif(memsize/2^20, 3), "MB\n");
  if(!is.null(object@runParallel) && object@runParallel$enable == 1){
    cat("CAMERA runs in parallel mode!\n");
  }
})

###End Constructor###

setGeneric("groupComplete", function(object,...)
  standardGeneric("groupComplete"))

setMethod("groupComplete", "xsAnnotate", function(object, h=NULL,...) {
  
  sample    <- object@sample;
  pspectra  <- list();
  psSamples <- NA;
  
  #generate distance matrix
  nPeaks <- nrow(object@groupInfo)
  
  distMat <- matrix(NA, nrow=nPeaks, ncol=nPeaks)

  # First Part of Score
  # Remove peaks combination which are to far away
  
  rt <- object@xcmsSet@peaks[, c("rtmax","rtmin","rt"),drop=FALSE]
  max.Rt <-  max((rt[, 1]-rt[, 3]), (rt[, 3]-rt[, 2]))
  distRT <- dist(object@groupInfo[,"rt"], method="manhattan")
  if(is.null(h)){
    result <- cutree(hclust(distRT), h=max.Rt/2)    
  }else if(is.numeric(h)){
    result <- cutree(hclust(distRT), h=h)
  } else {
    stop("h must be numeric!\n")
  }
  

  for(i in unique(result)){
    index <- which(result == i)
    pspectra[[length(pspectra)+1]] <- index
  }
  psSamples <- rep(sample, length(pspectra))

  object@pspectra  <- pspectra;
  object@psSamples <- psSamples;
  cat("Created", length(object@pspectra), "pseudospectra.\n")
  return(object)
})

setGeneric("groupDen", function(object, bw=5, ...)
  standardGeneric("groupDen"))

setMethod("groupDen", "xsAnnotate", function(object, bw=5, ...) {
  
  sample    <- object@sample;
  pspectra  <- list();
  psSamples <- NA;

  #generate distance matrix
  nPeaks <- nrow(object@groupInfo)
  rt <- object@groupInfo[ ,"rt"]  
  
  den <- density(rt, bw, from = min(rt)-3*bw, to = max(rt)+3*bw)
  maxden <- max(den$y)
  deny <- den$y
  snum <- 0
  while (deny[maxy <- which.max(deny)] > 0 ) {
    grange <- xcms:::descendMin(deny, maxy)
    deny[grange[1]:grange[2]] <- 0
    gidx <- which(rt >= den$x[grange[1]] & rt <= den$x[grange[2]])
    pspectra[[length(pspectra)+1]] <- gidx
    snum <- snum + length(gidx)
  }
  psSamples <- rep(sample, length(pspectra))
  
  object@pspectra  <- pspectra;
  object@psSamples <- psSamples;
  cat("Created", length(object@pspectra), "pseudospectra.\n")
  return(object)
})

###xsAnnotate generic Methods###
setGeneric("groupFWHM", function(object, sigma=6, perfwhm=0.6, intval="maxo") 
  standardGeneric("groupFWHM"))

setMethod("groupFWHM", "xsAnnotate", function(object, sigma=6, perfwhm=0.6, intval="maxo") {
  # grouping after retentiontime 
  # sigma - number of standard deviation arround the mean (6 = 2 x 3 left and right)
  # perfwhm - 0.3;

  if (!class(object) == "xsAnnotate") {
    stop ("no xsAnnotate object")
  }

  if (!sum(intval == c("into","intb","maxo"))){
       stop("unknown intensity value!")
  }

  sample    <- object@sample;
  pspectra  <- list();
  psSamples <- NA;

  cat("Start grouping after retention time.\n")

  if(object@groupInfo[1, "rt"] == -1) {
     # Like FTICR Data
     warning("Warning: no retention times avaiable. Do nothing\n");
     return(invisible(object));
  }else{
    if(is.na(sample[1]) || length(object@xcmsSet@filepaths) > 1) {
      # grouped peaktable within automatic selection or sub selection
      if(is.na(sample[1])){
        index <- 1:length(object@xcmsSet@filepaths);
      }else{
        index <- sample;
      }
      gvals    <- groupval(object@xcmsSet)[,index,drop=FALSE];
      peakmat  <- object@xcmsSet@peaks;
      groupmat <- groups(object@xcmsSet);

      #calculate highest peaks
      maxo      <- as.numeric(apply(gvals, 1, function(x, peakmat){
        val <- na.omit(peakmat[x, intval]);
        if(length(val) == 0){
          return(NA);
        }else{
          return(max(val))
        }
      }, peakmat));
      
      maxo[which(is.na(maxo))] <- -1;
      maxo      <- cbind(1:length(maxo),maxo);
      
      #highest peak index 
      int.max   <- as.numeric(apply(gvals, 1, function(x, peakmat){which.max(peakmat[x, intval])}, peakmat));

      peakrange <- matrix(apply(gvals, 1, function(x, peakmat) { 
        val <- peakmat[x, intval];
        if(length(na.omit(val)) == 0){
          return(c(0,1));
        } else {
          return(peakmat[x[which.max(val)], c("rtmin", "rtmax")]);
        }
      }, peakmat), ncol=2, byrow=TRUE); 

      colnames(peakrange) <- c("rtmin", "rtmax")

      while(length(maxo) > 0){
          iint   <- which.max(maxo[,2]);
          rtmed  <- groupmat[iint, "rtmed"]; #highest peak in whole spectra
          rt.min <- peakrange[iint, "rtmin"];
          rt.max <- peakrange[iint, "rtmax"]; #begin and end of the highest peak
          hwhm   <- ((rt.max-rt.min) / sigma * 2.35 * perfwhm) / 2; #fwhm of the highest peak
          #all other peaks whose retensiontimes are in the fwhm of the highest peak
          irt    <- which(groupmat[, 'rtmed'] > (rtmed-hwhm) & groupmat[, 'rtmed'] < (rtmed + hwhm)) 
          if(length(irt) > 0){
              #if peaks are found
              idx <- maxo[irt,1];
              pspectra[[length(pspectra)+1]] <- idx; #create groups
              psSamples[length(pspectra)]  <- index[int.max[maxo[iint,1]]] # saves the sample of the peak which is in charge for this pspectrum
              maxo <- maxo[-irt, ,drop=FALSE]; #set itensities of peaks to NA, due to not to be found in the next cycle
              groupmat   <- groupmat[-irt, ,drop=FALSE];
              peakrange  <- peakrange[-irt, ,drop=FALSE];
          }else{              
              idx <- maxo[iint,1];
              cat("Warning: Feature ",idx," looks odd for at least one peak. Please check afterwards.\n");
              pspectra[[length(pspectra)+1]] <- idx; #create groups
              psSamples[length(pspectra)]  <- index[int.max[maxo[iint,1]]] # saves the sample of the peak which is in charge for this pspectrum
              maxo <- maxo[-iint, ,drop=FALSE]; #set itensities of peaks to NA, due to not to be found in the next cycle
              groupmat   <- groupmat[-iint, ,drop=FALSE];
              peakrange  <- peakrange[-iint, ,drop=FALSE];
          }
      }
    }else{
      #One sample experiment
      peakmat <- object@xcmsSet@peaks;
      maxo    <- peakmat[, intval]; #max intensities of all peaks
      maxo    <- cbind(1:length(maxo),maxo);

      while(length(maxo)> 0){
          iint   <- which.max(maxo[,2]);
          rtmed  <- peakmat[iint, "rt"]; #highest peak in whole spectra
          rt.min <- peakmat[iint, "rtmin"];
          rt.max <- peakmat[iint, "rtmax"]; #begin and end of the highest peak
          hwhm   <- ((rt.max - rt.min) / sigma * 2.35 * perfwhm) / 2; #fwhm of the highest peak
          #all other peaks whose retensiontimes are in the fwhm of the highest peak
          irt    <- which(peakmat[, 'rt'] > (rtmed - hwhm) & peakmat[, 'rt'] < (rtmed + hwhm)) 
          if(length(irt)>0){
              #if peaks are found
              idx <- maxo[irt,1];
              pspectra[[length(pspectra)+1]] <- idx; #create groups
              maxo <- maxo[-irt, ,drop=FALSE]; #set itensities of peaks to NA, due to not to be found in the next cycle
              peakmat <- peakmat[-irt, ,drop=FALSE];
          }else{
              idx <- maxo[iint,1];
              cat("Warning: Feature ",idx," looks odd for at least one peak. Please check afterwards.\n");
              pspectra[[length(pspectra)+1]] <- idx; #create groups
              maxo       <- maxo[-iint, ,drop=FALSE]; #set itensities of peaks to NA, due to not to be found in the next cycle
              peakmat  <- peakmat[-iint, ,drop=FALSE];
          }
      }
      psSamples <- rep(sample, length(pspectra))
    }

    object@pspectra  <- pspectra;
    object@psSamples <- psSamples;
    cat("Created", length(object@pspectra), "pseudospectra.\n")
  }
  return(invisible(object)); #return object
})


setGeneric("groupCorr",function(object, cor_eic_th=0.75, pval=0.05, graphMethod="hcs", 
                                calcIso = FALSE, calcCiS = TRUE, calcCaS = FALSE, psg_list=NULL, 
                                xraw=NULL, cor_exp_th=0.75, intval="into", ...) standardGeneric("groupCorr"));

setMethod("groupCorr","xsAnnotate", function(object, cor_eic_th=0.75, pval=0.05, 
                                             graphMethod="hcs", calcIso = FALSE, calcCiS = TRUE, 
                                             calcCaS = FALSE, psg_list=NULL, xraw=NULL,
                                             cor_exp_th=0.75, intval="into") {
  if (! (calcCiS || calcCaS)) {
    stop ("At least one of calcCiS or calcCaS has to be TRUE.\n");
  }
  
  if (!is.numeric(cor_eic_th) || cor_eic_th < 0 || cor_eic_th > 1){
    stop ("Parameter cor_eic_th must be numeric and between 0 and 1.\n");
  }

  if (!is.numeric(cor_exp_th) || cor_exp_th < 0 || cor_exp_th > 1){
    stop ("Parameter cor_exp_th must be numeric and between 0 and 1.\n");
  }
  
  if (!is.numeric(pval) || pval < 0 || pval > 1){
    stop ("Parameter pval must be numeric and between 0 and 1.\n");
  }
  
  
  checkMethod <- match.arg(graphMethod, c("hcs","lpc"))
  if (is.na(checkMethod)){
    stop("Parameter graphMethod is unknown: ", graphMethod,"\n")
  }
  rm(checkMethod)
  
  if (!is.logical(calcIso)) {
    stop ("Parameter calcIso must be logical.\n");
  }
  
  if (!is.logical(calcCiS)) {
    stop ("Parameter calcCiS must be logical.\n");
  }
  
  if (!is.logical(calcCaS)) {
    stop ("Parameter calcCaS must be logical.\n");
  }
  
  if (!is.null(psg_list) && (!is.numeric(psg_list) || any(psg_list > length(object@pspectra)) || any(psg_list < 0)) ) {
    stop ("If parameter psg_list is used, it must be numeric and contains only indices lower or equal maximum number
          of pseudospectra.\n");
  }
  
  if (!is.null(xraw) &&  !class(xraw) == "xcmsRaw") {
    stop ("Parameter xraw must be null or an xcmsRaw object\n");
  }
  
  npspectra <- length(object@pspectra);

  cat("Start grouping after correlation.\n")
  #Data is not preprocessed with groupFWHM 
  if(npspectra < 1){
    cat("Data was not preprocessed with groupFWHM, creating one pseudospectrum with all peaks.\n")
    #Group all peaks into one group
    npspectra <- 1;
    object@pspectra[[1]] <- seq(1:nrow(object@groupInfo));
    if(is.na(object@sample[1])){
      object@psSamples <- rep(1,nrow(object@groupInfo)); ##TODO: Change if sample=NA or sample=number
    }else{
      object@psSamples <- rep(object@sample,nrow(object@groupInfo));
    }
  }

  #save number of pspectra before groupCorr
  cnt <- length(object@pspectra);
  res <- list();

  # Check LC information and calcCorr was selected
  if(calcCiS && object@xcmsSet@peaks[1,"rt"] != -1){
    if(!is.null(xraw)) {
      #Use provided xcmsRaw
      maxscans <- length(xraw@scantime)
      scantimes<-list();
      scantimes[[1]] <- xraw@scantime
      pdata <- object@groupInfo
      EIC <- CAMERA:::getEICs(xraw, pdata, maxscans)
      
      res[[1]] <- calcCiS(object, EIC=EIC, corval=cor_eic_th, 
                          pval=pval, psg_list=psg_list);
    }else if(is.na(object@sample[1])){
      #Autoselect sample path for EIC correlation    
      index <- rep(0, nrow(object@groupInfo));
      
      for(i in 1:npspectra){
        index[object@pspectra[[i]]] <- object@psSamples[[i]];
      }

      #Generate EIC data
      tmp <- getAllPeakEICs(object, index=index);
      EIC <- tmp$EIC
      scantimes <- tmp$scantimes
      rm(tmp);

      res[[1]] <- calcCiS(object, EIC=EIC, corval=cor_eic_th, pval=pval, psg_list=psg_list);

    } else {
      #Calculate EIC-Correlation for selected sample(s)

      for(i in object@sample){
        index <- rep(i, nrow(object@groupInfo));
        tmp <- getAllPeakEICs(object, index=index);
        EIC <- tmp$EIC
        scantimes <- tmp$scantimes
        rm(tmp);
        #new calcCL function with rcorr from Hmisc
        if(length(res) == 0){
          res[[1]] <- calcCiS(object, EIC=EIC, corval=cor_eic_th, pval=pval, psg_list=psg_list);
        }else{
          res[[1]] <- combineCalc(res[[1]],calcCiS(object, EIC=EIC, corval=cor_eic_th, pval=pval, psg_list=psg_list),method="sum")
        }
      }
      res[[1]][,3] <- res[[1]][,3] / length(object@sample)
    }
  } else if(calcCiS && object@xcmsSet@peaks[1,"rt"] == -1){
    cat("Object contains no retention time data!\n");
  }
  
  # Check if sample size > 3 and calcCaS was selected
  if( length(object@xcmsSet@filepaths) > 3 && calcCaS){
    res[[length(res)+1]] <- calcCaS(object, corval=cor_exp_th, pval=pval, intval=intval);
  }else if(length(object@xcmsSet@filepaths) <= 3 && calcCaS){
    cat("Object has to contain more than 3 samples to calculate correlation accros samples!\n");
  }
  
  #If object has isotope information and calcIso was selected
  if (calcIso) {
    if (nrow(object@isoID) > 0) {
      res[[length(res)+1]] <- calcIsotopes(object);
    } else {
      cat("Object contains no isotope or isotope annotation!\n");
    }
  }

  #Check if we have at least 2 result matrixes
  if(length(res) > 2){
    #combine the first two to create the result Table
    resMat <- combineCalc(res[[1]], res[[2]], method="sum");
    for( i in 3:length(res)){
      resMat <- combineCalc(resMat, res[[i]], method="sum");
    }         
  }else if(length(res) == 2){
    #combine one time
    resMat <- combineCalc(res[[1]], res[[2]], method="sum")
  } else {
    #Only one matrix
    resMat <- res[[1]];
  }

  #if(nrow(resMat) < 1){
    #Matrix contains no edge
    #Do nothing!
   # cat("No group was seperated.\n")
  #  return(invisible(object));
  #}

  #Perform graph seperation to seperate co-eluting pseudospectra
  object <- calcPC(object, method=graphMethod, ajc=resMat, psg_list=psg_list);                                   

  #Create pc groups based on correlation results
  cat("xsAnnotate has now", length(object@pspectra), "groups, instead of", cnt, "\n"); 

  return(invisible(object));
})


setGeneric("findIsotopes", function(object, maxcharge=3, maxiso=4, ppm=5, mzabs=0.01, 
                                    intval=c("maxo","into","intb"),minfrac=0.5, 
                                    isotopeMatrix=NULL,filter=TRUE) standardGeneric("findIsotopes"));

setMethod("findIsotopes", "xsAnnotate", 
  function(object, maxcharge=3, maxiso=4, ppm=5, mzabs=0.01, 
                   intval=c("maxo","into","intb"), minfrac=0.5,  isotopeMatrix=NULL,
           filter=TRUE){
  
  #searches in every pseudospectrum after mass differences, 
  #which matches isotope distances
  
  ####Test arguments####
  #test maxcharge  
  if(!is.wholenumber(maxcharge) || maxcharge < 1){
    stop("Invalid argument 'maxcharge'. Must be integer and > 0.\n")
  }
  
  #test maxiso
  if(!is.wholenumber(maxiso) || maxiso < 1){
    stop("Invalid argument 'maxiso'. Must be integer and > 0.\n")
  }
  
  #test ppm
  if(!is.numeric(ppm) || ppm < 0){
    stop("Invalid argument 'ppm'. Must be numeric and not negative.\n")
  }
  
  #test mzabs
  if(!is.numeric(mzabs) || mzabs < 0){
    stop("Invalid argument 'mzabs'. Must be numeric and not negative.\n")
  }
  
  #test intval
  intval <- match.arg(intval)
  
  #test minfrac
  if(!is.numeric(minfrac) || minfrac < 0 || minfrac > 1){
    stop("Invalid argument 'minfrac'. Must be numeric and between 0 and 1.\n")
  }
  
  #test isotopeMatrix
  if(!is.null(isotopeMatrix)){
    if(!is.matrix(isotopeMatrix) || ncol(isotopeMatrix) != 4 || nrow(isotopeMatrix) < 1
       || !is.numeric(isotopeMatrix)){
      stop("Invalid argument 'isotopeMatrix'. Must be four column numeric matrix.\n")
    } else {
      colnames(isotopeMatrix) <- c("mzmin", "mzmax", "intmin", "intmax")
    }
  }else if(maxiso > 8){
     stop("Invalid argument 'maxiso'. Must be lower 9 or provide your own isotopeMatrix.\n")
  }else{
    isotopeMatrix <- calcIsotopeMatrix(maxiso=maxiso)
  }
  ####End Test arguments####
  
  npeaks.global <- 0; #Counter for % bar
  npspectra <- length(object@pspectra);

  # scaling
  devppm <- ppm / 1000000;
  filter <- filter;
  #generate parameter list
  params <- list(maxiso=maxiso, maxcharge=maxcharge, devppm=devppm, mzabs=mzabs, IM=isotopeMatrix, minfrac=minfrac, filter=filter)
  
  #Check if object have been preprocessed with groupFWHM
  if(npspectra < 1) {
    cat("xsAnnotate contains no pseudospectra. Regroup all peaks into one!\n")
    npspectra <- 1;
    object@pspectra[[1]] <- seq(1:nrow(object@groupInfo));
    object@psSamples  <- 1;
  }
  
  #number of peaks in pseudospectra
  ncl <- sum(sapply(object@pspectra, length));

  # get mz,rt and intensity values from peaktable
  if(nrow(groups(object@xcmsSet)) > 0){
    ##multiple sample or grouped single sample
    if(is.na(object@sample[1])){
      index <- 1:length(object@xcmsSet@filepaths);
    }else{
      index <- object@sample;
    }
    cat("Generating peak matrix!\n");
    mint     <- groupval(object@xcmsSet,value=intval)[,index,drop=FALSE];
    imz <- object@groupInfo[, "mz", drop=FALSE];
    irt <- object@groupInfo[, "rt", drop=FALSE];
  }else{
    ##one sample case
    cat("Generating peak matrix!\n");
    imz  <- object@groupInfo[, "mz", drop=FALSE];
    irt  <- object@groupInfo[, "rt", drop=FALSE];
    mint <- object@groupInfo[, intval, drop=FALSE];      
  }
  
  isotope   <- vector("list", length(imz));
    
  isomatrix <- matrix(ncol=5, nrow=0);
  colnames(isomatrix) <- c("mpeak", "isopeak", "iso", "charge", "intrinsic")


  cat("Run isotope peak annotation\n % finished: ");
  lp <- -1;

  #look for isotopes in every pseudospectra
  for(i in seq(along = object@pspectra)){
    #get peak indizes for i-th pseudospectrum
    ipeak <- object@pspectra[[i]];

    #Ouput counter
    percentOutput(npeaks.global, length(ipeak), ncl, lp)
    
    #Pseudospectrum has more than one peak
    if(length(ipeak) > 1){
      #peak mass and intensity for pseudospectrum
      mz  <- imz[ipeak];
      int <- mint[ipeak, , drop=FALSE];
      isomatrix <-  findIsotopesPspec(isomatrix, mz, ipeak, int, params)              
    }
  }

  #clean isotopes
  if(is.null(nrow(isomatrix))) {
    isomatrix = matrix(isomatrix, byrow=F, ncol=length(isomatrix)) 
  }
  
  #check if every isotope has only one annotation
  if(length(idx.duplicated <- which(duplicated(isomatrix[, 2]))) > 0){
    peak.idx <- unique(isomatrix[idx.duplicated, 2]);
    for( i in 1:length(peak.idx)){
      #peak.idx has two or more annotated charge
      #select the charge with the higher cardinality
      peak <- peak.idx[i];
      peak.mono.idx <- which(isomatrix[,2] == peak)
      if(length(peak.mono.idx) < 2){
        #peak has already been deleted
        next;
      }
      peak.mono <- isomatrix[peak.mono.idx,1]
      #which charges we have
      charges.list   <- isomatrix[peak.mono.idx, 4];
      tmp <- cbind(peak.mono,charges.list);
      charges.length <- apply(tmp,1, function(x,isomatrix) { 
        length(which(isomatrix[, 1] == x[1] & isomatrix[,4] == x[2])) }, 
                              isomatrix);
      idx <- which(charges.length == max(charges.length));
      if(length(idx) == 1){
        #max is unique
        isomatrix <- isomatrix[-which(isomatrix[, 1] %in% peak.mono[-idx] & isomatrix[, 4] %in% charges.list[-idx]),, drop=FALSE]
      }else{
        #select this one, which lower charge
        idx <- which.min(charges.list[idx]);
        isomatrix <- isomatrix[-which(isomatrix[, 1] %in% peak.mono[-idx] & isomatrix[, 4] %in% charges.list[-idx]),, drop=FALSE]
      }
    }
  }


  #check if every isotope in one isotope grp, have the same charge
  if(length(idx.duplicated <- which(duplicated(paste(isomatrix[, 1], isomatrix[, 3])))) > 0){
    #at least one pair of peakindex and number of isotopic peak is identical
    peak.idx <- unique(isomatrix[idx.duplicated,1]);
    for( i in 1:length(peak.idx)){
      #peak.idx has two or more annotated charge
      #select the charge with the higher cardinality
      peak <- peak.idx[i];
      #which charges we have
      charges.list   <- unique(isomatrix[which(isomatrix[, 1] == peak), 4]);
      #how many isotopes have been found, which this charges
      charges.length <- sapply(charges.list, function(x,isomatrix,peak) { length(which(isomatrix[, 1] == peak & isomatrix[, 4] == x)) },isomatrix,peak);
      #select the charge which the highest cardinality
      idx <- which(charges.length == max(charges.length));
      if(length(idx) == 1){
        #max is unique
        isomatrix <- isomatrix[-which(isomatrix[, 1] == peak & isomatrix[, 4] %in% charges.list[-idx]),, drop=FALSE]
      }else{
        #select this one, which lower charge
        idx <- which.min(charges.list[idx]);
        isomatrix <- isomatrix[-which(isomatrix[, 1] == peak & isomatrix[, 4] %in% charges.list[-idx]),, drop=FALSE]
      }
    }
  }

  #Combine isotope cluster, if they overlap
  index2remove <- c();

  if(length(idx.duplicated <- which(isomatrix[, 1] %in% isomatrix[, 2]))>0){
    for(i in 1:length(idx.duplicated)){
      index <-  which(isomatrix[, 2] == isomatrix[idx.duplicated[i], 1])
      index2 <- sapply(index, function(x, isomatrix) which(isomatrix[, 1] == isomatrix[x, 1] & isomatrix[,3] == 1),isomatrix)
      if(length(index2) == 0){
        index2remove <- c(index2remove,idx.duplicated[i])
      }
      max.index <- which.max(isomatrix[index,4]);
      isomatrix[idx.duplicated[i], 1] <- isomatrix[index[max.index], 1];
      isomatrix[idx.duplicated[i], 3] <- isomatrix[index[max.index], 3]+1;
    }
  }

  if(length(index <- which(isomatrix[,"iso"] > maxiso)) > 0){
    index2remove <- c(index2remove, index)
  }
  
  if(length(index2remove) > 0){
    isomatrix <- isomatrix[-index2remove,, drop=FALSE];
  }

  isomatrix <- isomatrix[order(isomatrix[,1]),,drop=FALSE]
  #Create isotope matrix within object
  object@isoID <- matrix(nrow=0, ncol=4);
  colnames(object@isoID)  <-  c("mpeak", "isopeak", "iso", "charge");
  
  #Add isomatrix to object
  object@isoID <- rbind(object@isoID, isomatrix[, 1:4]);
  
  # counter for isotope groups
  globalcnt <- 0;
  oldnum    <- 0;
  
  if(nrow(isomatrix) > 0){
    for( i in 1:nrow(isomatrix)){
      if(!isomatrix[i, 1] == oldnum){
          globalcnt <- globalcnt+1; 
          isotope[[isomatrix[i, 1]]] <- list(y=globalcnt, iso=0, charge=isomatrix[i, 4], val=isomatrix[i, 5]);
          oldnum <- isomatrix[i, 1];
      };
      isotope[[isomatrix[i,2]]] <- list(y=globalcnt,iso=isomatrix[i,3],charge=isomatrix[i,4],val=isomatrix[i,5]);
    }
  }
  cnt<-nrow(object@isoID);
  cat("\nFound isotopes:",cnt,"\n");
  object@isotopes <- isotope;
  return(object);
})

setGeneric("findAdducts", function(object, ppm=5, mzabs=0.015, multiplier=3, polarity=NULL, 
                                   rules=NULL, max_peaks=100, psg_list=NULL, intval="maxo") standardGeneric("findAdducts"));
setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, multiplier=3, polarity=NULL, 
                                                rules=NULL, max_peaks=100, psg_list=NULL, intval="maxo"){
  multFragments=FALSE;
  # Scaling ppm factor
  devppm <- ppm / 1000000;
  # counter for % bar
  npeaks.global <- 0;

  # get mz values from peaklist
  imz    <- object@groupInfo[, "mz"];

  #number of pseudo-spectra
  npspectra <- length(object@pspectra);

  #If groupCorr or groupFHWM have not been invoke, select all peaks in one sample
  if(npspectra < 1){ 
    npspectra <- 1;
    object@pspectra[[1]] <- seq(1:nrow(object@groupInfo)); 
  }

  if(object@sample[1] == 1 & length(sampnames(object@xcmsSet)) == 1){
    ##one sample case
    mint <- object@groupInfo[, intval];
  }else{
    ##multiple sample
    if(is.na(object@sample[1])){
      index <- 1:length(object@xcmsSet@filepaths);
    }else{
      index <- object@sample;
    }
    
    cat("Generating peak matrix for peak annotation!\n");
    mint     <- groupval(object@xcmsSet,value=intval)[, index, drop=FALSE];
    peakmat  <- object@xcmsSet@peaks;
    groupmat <- groups(object@xcmsSet);
    
    imz <- groupmat[, "mzmed"];
    irt <- groupmat[, "rtmed"];
    int.val <- c();
    nsample <- length(object@sample);
  }


  # isotopes
  isotopes  <- object@isotopes;
  # adductlist
  derivativeIons <- vector("list", length(imz));
  
  # other variables
  oidscore <- c();
  index    <- c();
  annoID   <- matrix(ncol=4, nrow=0)
  annoGrp  <- matrix(ncol=4, nrow=0)
  colnames(annoID)  <-  colnames(object@annoID)
  colnames(annoGrp) <-  colnames(object@annoGrp)

  ##Examine polarity and rule set
  if(!(object@polarity == "")){
    cat(paste("Polarity is set in xsAnnotate:", object@polarity, "\n"));
    if(is.null(rules)){
      if(!is.null(object@ruleset)){
        rules <- object@ruleset;
      }else{ 
        cat("Ruleset could not read from object! Recalculate\n");
        rules <- calcRules(maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, nh=2,
                           polarity=object@polarity, 
                           lib.loc= .libPaths(), multFragments=multFragments);
        object@ruleset <- rules;
      }
    }else{ 
      object@ruleset <- rules;
      cat("Found and use user-defined ruleset!");
    }
  }else{
    if(!is.null(polarity)){
      if(polarity %in% c("positive","negative")){
        if(is.null(rules)){
          rules <- calcRules(maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, 
                              nh=2, polarity=polarity, lib.loc= .libPaths(),
                             multFragments=multFragments);
        }else{ cat("Found and use user-defined ruleset!");}
          object@polarity <- polarity;
      }else stop("polarity mode unknown, please choose between positive and negative.")
    }else if(length(object@xcmsSet@polarity) > 0){
      index <- which(sampclass(object@xcmsSet) == object@category)[1] + object@sample-1;
      if(object@xcmsSet@polarity[index] %in% c("positive","negative")){
        if(is.null(rules)){
          rules <- calcRules(maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, 
                             nh=2, polarity=object@xcmsSet@polarity[index], 
                             lib.loc= .libPaths(), multFragments=multFragments);
        }else{ cat("Found and use user-defined ruleset!");}
        object@polarity <- polarity;
      }else stop("polarity mode in xcmsSet unknown, please define variable polarity.")
  }else stop("polarity mode could not be estimated from the xcmsSet, please define variable polarity!")
    #save ruleset
    object@ruleset <- rules;
  }

  ##Run as single or parallel mode
  runParallel <- 0;

  if(object@runParallel$enable == 1){
    if(!(is.null(object@runParallel$cluster)) || mpi.comm.size() > 0 ){
      runParallel <- 1;
    }else{
      warning("CAMERA runs in parallel mode, but no slaves are spawned!\nRun in single core mode!\n");
      runParallel <- 0;
    }
  }else{
    runParallel <- 0;
  }

  if("quasi" %in% colnames(rules)){
  #backup for old rule sets
   quasimolion <- which(rules[, "quasi"]== 1) 
  }else{
   quasimolion <- which(rules[, "mandatory"]== 1)
  }
  
  #Remove recognized isotopes from annotation m/z vector
  for(x in seq(along = isotopes)){
    if(!is.null(isotopes[[x]])){
      if(isotopes[[x]]$iso != 0){
        imz[x] <- NA;
      }
    }
  }
  
  #counter for % bar
  npeaks    <- 0; 
  massgrp   <- 0;
  ncl <- sum(sapply(object@pspectra, length));  

  if (runParallel == 1) { ## ... we run in parallel mode
    if(is.null(psg_list)){
      cat('\nCalculating possible adducts in',npspectra,'Groups... \n');
      lp <- -1;
      pspectra_list <- 1:npspectra;
    }else{
      cat('\nCalculating possible adducts in',length(psg_list),'Groups... \n'); 
      lp <- -1;
      pspectra_list <- psg_list;
    }
    
    argList <- list();
    
    cnt_peak <- 0;
    if(is.null(max_peaks)){
      max_peaks=100;
    }
    paramsGlobal <- list();
    if("typ" %in% colnames(rules)){
      rules.idx <- which(rules[, "typ"]== "A")
      parent <- TRUE;
    }else{
      #backup for old rule sets
      rules.idx <- 1:nrow(rules);
      parent <- FALSE;
    }
    
    #Add params to env
    paramsGlobal <- list()
    paramsGlobal$pspectra <- object@pspectra;
    paramsGlobal$imz <- imz;
    paramsGlobal$rules <- rules;
    paramsGlobal$mzabs <- mzabs;
    paramsGlobal$devppm <- devppm;
    paramsGlobal$isotopes <- isotopes;
    paramsGlobal$quasimolion <- quasimolion;
    paramsGlobal$parent <- parent;
    paramsGlobal$rules.idx <- rules.idx;
    #create params
    #paramsGlobal <- list2env(params)
    
    params <- list();
    
    for(j in 1:length(pspectra_list)){
      i <- pspectra_list[j];
      params$i[[length(params$i)+1]] <- i;
      cnt_peak <- cnt_peak+length(object@pspectra[[i]]);
      if(cnt_peak > max_peaks || j == length(pspectra_list)){
        argList[[length(argList)+1]] <- params
        cnt_peak <- 0;
        params <- list();
      }
    }
    
    #Some informationen for the user
    cat("Parallel mode: There are",length(argList), "tasks.\n")
    
    if(is.null(object@runParallel$cluster)){
      #Use MPI
      result <- xcmsPapply(argList, annotateGrpMPI2, paramsGlobal)
    }else{
      #For snow
      result <- xcms:::xcmsClusterApply(cl=object@runParallel$cluster, 
                                        x=argList, fun=annotateGrpMPI, 
                                        msgfun=msgfun.snowParallel,
                                        paramsGlobal)
    }

    for(ii in 1:length(result)){
      if(length(result[[ii]]) == 0){
        next;
      }
      for(iii in 1:length(result[[ii]])){
        hypothese <- result[[ii]][[iii]];
        if(is.null(hypothese)){
          next;
        }
        charge <- 0;
        old_massgrp <- 0;
        index <- argList[[ii]]$i[[iii]];
        ipeak <- object@pspectra[[index]];
        for(hyp in 1:nrow(hypothese)){
          peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]);
          if(old_massgrp != hypothese[hyp,"massgrp"]) {
            massgrp <- massgrp+1;
            old_massgrp <- hypothese[hyp,"massgrp"];
            annoGrp <- rbind(annoGrp,c(massgrp,hypothese[hyp,"mass"],
                                       sum(hypothese[ which(hypothese[,"massgrp"]==old_massgrp),"score"]),i) ) 
          }
          
          if(parent){
            annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp, c("ruleID","parent")]))  
          }else{
            annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp, c("ruleID")],NA))
          }
          
        }
      }
    }

    derivativeIons <- getderivativeIons(annoID,annoGrp,rules,length(imz));
    cat("\n");
    object@derivativeIons <- derivativeIons;
    object@annoID  <- annoID;
    object@annoGrp <- annoGrp;
    return(object)
  } else {
    ##Single Core Mode
    if(is.null(psg_list)){
      cat('\nCalculating possible adducts in',npspectra,'Groups... \n % finished: '); 
      lp <- -1;
      pspectra_list <- 1:npspectra;
    }else{
      cat('\nCalculating possible adducts in',length(psg_list),'Groups... \n % finished: '); 
      lp <- -1;
      pspectra_list <- psg_list;
      sum_peaks <- sum(sapply(object@pspectra[psg_list],length));
    }
    
    if("typ" %in% colnames(rules)){
      rules.idx <- which(rules[, "typ"]== "A")
      parent <- TRUE;
    }else{
      #backup for old rule sets
      rules.idx <- 1:nrow(rules);
      parent <- FALSE;
    }
    
    for(j in seq(along = pspectra_list)){
      i <- pspectra_list[j];
      
      #peak index for those in pseudospectrum i
      ipeak <- object@pspectra[[i]];

      #percent output
      npeaks.global <- npeaks.global + length(ipeak);
      perc   <- round((npeaks.global) / ncl * 100)
      perc   <- perc %/% 10 * 10;
      
      if (perc != lp && perc != 0) { 
        cat(perc,' '); 
        lp <- perc;
      }
      if (.Platform$OS.type == "windows"){ 
        flush.console();
      }
      #end percent output

      #check if the pspec contains more than one peak 
      if(length(ipeak) > 1){
        
        hypothese <- annotateGrp(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx=rules.idx);
        #save results
        if(is.null(hypothese)){
          next;
        }
        charge <- 0;
        old_massgrp <- 0;
        
        #combine annotation hypotheses to annotation groups for one compound mass
        for(hyp in 1:nrow(hypothese)){
          peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]);
          if(old_massgrp != hypothese[hyp, "massgrp"]) {
            massgrp <- massgrp + 1;
            old_massgrp <- hypothese[hyp, "massgrp"];
            annoGrp <- rbind(annoGrp, c(massgrp, hypothese[hyp, "mass"], 
                                         sum(hypothese[ which(hypothese[, "massgrp"] == old_massgrp), "score"]), i) ) 
          }
          if(parent){
            annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], ipeak[hypothese[hyp, "parent"]]))
          }else{
            annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], NA))
          }
        }
      }
    }

    derivativeIons <- getderivativeIons(annoID, annoGrp, rules, length(imz));

    cat("\n");

    object@derivativeIons <- derivativeIons;
    object@annoID  <- annoID;
    object@annoGrp <- annoGrp;
    return(object)
  }
})

annotateDiffreport <- function(object, sample=NA, nSlaves=1, sigma=6, perfwhm=0.6,
  cor_eic_th=0.75, cor_exp_th=0.75, graphMethod="hcs", pval=0.05, calcCiS=TRUE,
  calcIso=FALSE, calcCaS=FALSE, maxcharge=3, maxiso=4, minfrac=0.5,
  ppm=5, mzabs=0.015, quick=FALSE, psg_list=NULL, rules=NULL,
  polarity="positive", multiplier=3, max_peaks=100, intval="into",
  pval_th = NULL, fc_th = NULL, sortpval=TRUE, ...) {

  if (!class(object)=="xcmsSet") stop ("no xcmsSet object");
  
  #use diffreport from xcms
  diffrep <- diffreport(object, sortpval=FALSE, ...);
  
  if(quick){
    #Quick run, no groupCorr and findAdducts
    xa <- xsAnnotate(object, sample=sample, nSlaves=nSlaves);
    xa <- groupFWHM(xa, perfwhm=perfwhm, sigma=sigma);
    xa <- findIsotopes(xa, maxcharge=maxcharge, maxiso=maxiso, ppm=ppm, mzabs=mzabs)
    xa.result <- getPeaklist(xa);
  }else{
    xa <- xsAnnotate(object, sample=sample, nSlaves=nSlaves);
    xa <- groupFWHM(xa, perfwhm=perfwhm, sigma=sigma);
    xa <- findIsotopes(xa, maxcharge=maxcharge, maxiso=maxiso, ppm=ppm, mzabs=mzabs)
    if(is.null(psg_list) & is.null(pval_th) & is.null(fc_th)){
      #no restriction for calculation ps-spectra
      #all groups will be calculated, psg_list=NULL
    }else{
      #One value was set
      #generate psg_list
      peaklist <- getPeaklist(xa);
      #Do we have restriction for psg_list?
      if(is.null(psg_list)){
        psg_list <- 1:length(xa@pspectra);
      }
      
      if(!is.null(pval_th)){
        #Find groups, which include features with p-val < pval_th
        index <- which(diffrep[, "pvalue"] < pval_th);
        index.grp <- unique(peaklist[index, "pcgroup"]);
        if(length(index.grp) > 0){
          psg_list  <- psg_list[which(psg_list %in% index.grp)]
        } else {
          cat("No groups found, which satisfy your conditions!\n")
          result <- cbind(diffrep, peaklist[, c("isotopes","adduct","pcgroup")])
          return(result);
        }
      }
      
      if(!is.null(fc_th)){
        #Find groups, which include features with fc > fc_th
        index <- which(diffrep[, "fold"] > fc_th);
        index.grp <- unique(peaklist[index, "pcgroup"]);
        if(length(index.grp) > 0){
          psg_list  <- psg_list[which(psg_list %in% index.grp)]
        } else {
          cat("No groups found, which satisfy your conditions!\n")
          result <- cbind(diffrep, peaklist[, c("isotopes","adduct","pcgroup")])
          return(result);
        }
      }
      
      if(length(psg_list) < 1){
        #no group satisfy conditions
        cat("No groups found, which satisfy your conditions!\n")
        result <- cbind(diffrep, peaklist[, c("isotopes","adduct","pcgroup")])
        return(result);
      }
    }
    
    #Include into psg_list all groups that has been created after groupCorr
    cnt <- length(xa@pspectra);
    xa <- groupCorr(
      xa,
      cor_eic_th=cor_eic_th, cor_exp_th=cor_exp_th, calcIso=calcIso, psg_list=psg_list,
      pval=pval, graphMethod=graphMethod, calcCiS=calcCiS, calcCaS=calcCaS, intval=intval
    )
    

    if(!is.null(psg_list)){
      psg_list <- c(psg_list,(cnt+1):length(xa@pspectra));
    }
    xa <- findAdducts(xa, multiplier=multiplier, ppm=ppm, mzabs=mzabs, 
                      polarity=polarity, rules=rules, psg_list=psg_list);
    
    xa.result <- getPeaklist(xa);
  }
  
  #combines results
  result <- cbind(diffrep, xa.result[, c("isotopes", "adduct", "pcgroup")])
  if(sortpval){
    #return diffreport order if p-value sorting is true
    result <- result[order(result[, "pvalue"]), ];
  }
  return(result);
} 

###End xsAnnotate generic Methods###

###xsAnnotate exported Methods###

#getpspectra
#Generate for one (or more) pseudospectrum a peaklist
#object - xsAnnotate
#grp - pseudospectrum ID
getpspectra <- function(object, grp=NULL){
  
  if(is.null(grp)) {
    cat("Error: No grp number!\n");
  } else if(!(is.numeric(grp)) | any(grp > length(object@pspectra))) {
    cat("Error: Grp is not numeric or contains number greater than maximum number of pseudospectra!\n");
  } else {
    index <- unlist(object@pspectra[grp]);
    #Check if index has peaks
    if(length(index) == 0){
      cat("Error: Pseudospectra selection contains no peaks.\n")
      return(NULL);
    }
    #get ps number for selected peaks  
    grpvec <- unlist(sapply(grp, function(x) { rep(x, length(object@pspectra[[x]])) }))
    
    #extract peaktable
    peaktable <- object@groupInfo[index,]

    adduct    <- vector("character",length(index));
    isotopes  <- vector("character",length(index));
    ions      <- object@derivativeIons;
    iso       <- object@isotopes;

    lions <- ions[index];
    liso  <- iso[index];

    #default polarity set to positive
    polarity <- "+";
    
    if(length(object@polarity) > 0){
      if(object@polarity == "negative"){
        polarity <- "-";
      }
    }
    
    for(i in 1:length(lions)){
      if(!is.null(lions[[i]])){
        if(length(lions[[i]]) > 1){
          names <- c();
          for(ii in 1:length(lions[[i]])){
            names <- paste(names,lions[[i]][[ii]]$name,lions[[i]][[ii]]$mass);
          }
          adduct[i] <- names;
        } else {
          adduct[i] <- paste(lions[[i]][[1]]$name,lions[[i]][[1]]$mass);
        }
      }
        
      if(!is.null(liso[[i]])){
        if(liso[[i]]$iso == 0){
          iso.name <- "[M]";
        }else{
          iso.name <- paste("[M+",liso[[i]]$iso,"]",sep="");
        }
        if(liso[[i]]$charge > 1){
          isotopes[i] <- paste("[",liso[[i]]$y,"] ",iso.name," ",liso[[i]]$charge,polarity,sep="");
        }else{
          isotopes[i] <- paste("[",liso[[i]]$y,"] ",iso.name," ",polarity,sep="");
        }
      }
    }

    #Check if peaktable has only one row
    if(is.null(nrow(peaktable))){ 
      peaktable <- matrix(peaktable, byrow=F, ncol=length(peaktable));
    }
    colnames(peaktable) <- colnames(object@groupInfo)
    
    return(invisible(data.frame(peaktable,isotopes,adduct,psg=grpvec,stringsAsFactors=FALSE)));
  }
}


setGeneric("getPeaklist", function(object, intval="into") standardGeneric("getPeaklist"))
setMethod("getPeaklist", "xsAnnotate", function(object, intval="into") {
  
  if (! intval %in% colnames(peaks(object@xcmsSet))) {
       stop("unknown intensity value!")
  }

  #generate peaktable
  #Check if xcmsSet contains only one sample
  if(object@sample[1] == 1 & length(sampnames(object@xcmsSet)) == 1){
    #intval is here ignored since all intensity values are already contained
    peaktable <- object@groupInfo;
  }else {
    #Case of xcmsSet with multiple samples
    #Use groupInfo information and replace intensity values
    peaktable <- object@groupInfo;
    
    #get intensity values from xcmsSet
    grpval <- groupval(object@xcmsSet, value=intval);
    
    #get column range for replacement
    grpval.ncol <- ncol(grpval)
    start <- ncol(peaktable) - grpval.ncol +1;
    ende  <- start + grpval.ncol - 1; 
    
    peaktable[, start:ende] <- grpval;
  }

  #allocate variables for CAMERA output
  adduct   <- vector("character", nrow(object@groupInfo));
  isotopes <- vector("character", nrow(object@groupInfo));
  pcgroup  <- vector("character", nrow(object@groupInfo));
   
  #default polarity set to positive
  polarity <- "+";
    
  if(length(object@polarity) > 0){
    if(object@polarity == "negative"){
      polarity <- "-";
    }
  }
  
  #First isotope informationen and adduct informationen
  for(i in seq(along = isotopes)){
    #check if adduct annotation is present for peak i
    if(length(object@derivativeIons) > 0 && !(is.null(object@derivativeIons[[i]]))) {
      #Check if we have more than one annotation for peak i
      if(length(object@derivativeIons[[i]]) > 1) {
        #combine ion species name and rounded mass hypophysis
        names <- paste(object@derivativeIons[[i]][[1]]$name, signif(object@derivativeIons[[i]][[1]]$mass, 6));
        for(ii in 2:length(object@derivativeIons[[i]])) {
          names <- paste(names, object@derivativeIons[[i]][[ii]]$name, signif(object@derivativeIons[[i]][[ii]]$mass, 6));
        }
        #save name in vector adduct
        adduct[i] <- names;
      } else {
        #Only one annotation
        adduct[i] <- paste(object@derivativeIons[[i]][[1]]$name, signif(object@derivativeIons[[i]][[1]]$mass, 6));
      }
    } else {
      #no annotation empty name
      adduct[i] <- ""; 
    }
    
    #Check if we have isotope informationen about peak i
    if(length(object@isotopes) > 0&& !is.null(object@isotopes[[i]])) {
      num.iso <- object@isotopes[[i]]$iso;
      #Which isotope peak is peak i?
      if(num.iso == 0){
        str.iso <- "[M]";
      } else { 
        str.iso <- paste("[M+", num.iso, "]", sep="")
      }
      #Multiple charged?
      if(object@isotopes[[i]]$charge > 1){
        isotopes[i] <- paste("[", object@isotopes[[i]]$y, "]", str.iso, object@isotopes[[i]]$charge, polarity, sep="");
      }else{
        isotopes[i] <- paste("[", object@isotopes[[i]]$y, "]", str.iso, polarity, sep="");
      }
    } else { 
      #No isotope informationen available
      isotopes[i] <- ""; 
    }
  }
  
  #Have we more than one pseudospectrum?
  if(length(object@pspectra) < 1){
      pcgroup <- 0;
  } else {
    for(i in seq(along = object@pspectra)){
      index <- object@pspectra[[i]];
      pcgroup[index] <- i;
    }
  }
          
  rownames(peaktable)<-NULL;#Bugfix for: In data.row.names(row.names, rowsi, i) :  some row.names duplicated:
  return(invisible(data.frame(peaktable, isotopes, adduct, pcgroup, stringsAsFactors=FALSE, row.names=NULL)));
})

setGeneric("getReducedPeaklist", function(object, method = "median", intval = "into", default.adduct.info = "first", mzrt.range = FALSE, npeaks.sum = FALSE, cleanup = FALSE) standardGeneric("getReducedPeaklist"))
setMethod("getReducedPeaklist", "xsAnnotate", function(object, method = "median", intval = "into", default.adduct.info = "first", mzrt.range = FALSE, npeaks.sum = FALSE, cleanup = FALSE) {
  # Get Peaklist from object
  peaklist <- getPeaklist(object=object, intval=intval)
  
  # Remove isotopes information
  peaklist <- peaklist[,-which(colnames(peaklist)=="isotopes")]
  
  # Matrix with samples intensities
  if (nrow(object@xcmsSet@phenoData) <= 1) {
    subset_samp <- c((which(colnames(peaklist)=="sample")) : (ncol(peaklist)-1-1))
  } else {
    subset_samp <- c((ncol(peaklist)-nrow(object@xcmsSet@phenoData)-1) : (ncol(peaklist)-1-1))
  }
  
  # Create reduced peak list
  peaklist_reduced <- by(data=peaklist, INDICES=peaklist$pcgroup, FUN=function(plist) {
    if (nrow(plist) > 1) {
      # Subset intensities
      pl.tab <- plist[, subset_samp, drop=FALSE]
      
      ## Select appropriate mz/rt information
      
      # Select default (first) adduct
      if (default.adduct.info == "first") {
        pl.max <- 1
      }
      
      # Select adduct that has maximum intensities
      if (default.adduct.info == "maxint") {
        pl.max <- apply(X=pl.tab, MARGIN=1, FUN=function(x) { sum(x, na.rm=TRUE) } )
        pl.max <- which(pl.max==max(pl.max))[1]
      }
      
      # Select adduct that has most peaks
      if (default.adduct.info == "maxpeaks") {
        pl.max <- which(plist$npeaks==max(plist$npeaks))[1]
      }
      
      # Take lowest/highest values of mzmin, mzmax & rtmin, rtmax
      if (mzrt.range == TRUE) {
        plist[pl.max, "mzmin"] <- min(plist$mzmin)
        plist[pl.max, "mzmax"] <- max(plist$mzmax)
        plist[pl.max, "rtmin"] <- min(plist$rtmin)
        plist[pl.max, "rtmax"] <- max(plist$rtmax)
      }
      
      # Sum up all peaks found
      if (npeaks.sum == TRUE) {
        plist[pl.max, "npeaks"] <- sum(plist$npeaks)
      }
      
      ## Selection methods
      
      # Sum intensities of all adducts per sample
      if (method == "sum") {
        pl.met <- apply(X=pl.tab, MARGIN=2, FUN=function(x) { sum(x, na.rm=TRUE) } )
      }
        
      # Median intensities of all adducts per sample
      if (method == "median") {
        pl.met <- apply(X=pl.tab, MARGIN=2, FUN=function(x) { median(x, na.rm=TRUE) } )
      }
          
      # Only take the adduct line where intensities are highest
      if (method == "maxint") {
        pl.met <- pl.max
      }
      
      # PCA of all adducts per samples
      if (method == "pca") {
        pl.tab[is.na(pl.tab)] <- 0
        pl.met <- prcomp(x=pl.tab)
        pl.met <- pl.met$rotation[,1]
        pl.met[pl.met==0] <- NA
        pl.met <- pl.met + (abs(min(pl.met, na.rm=T)) * 2)
      }
      
      
      # Determine putative neutral adduct mass
      plist[pl.max, "adduct"] <- na.omit(as.numeric(unique(gsub('.* ', '', plist$adduct))))[1]
      
      # Return line with max intensity
      plist[pl.max, subset_samp] <- pl.met
      plist <- plist[pl.max,]
    }
    
    # Otherwise using default putative neutral mass of adduct
    if ( (is.na(plist$adduct)) || (plist$adduct == "") ) plist$adduct <- as.numeric(plist$mz)
    
    # Return object
    plist
  } )
  
  # Fix type
  peaklist_reduced <- lapply(X=peaklist_reduced, FUN=function(x) {
    x <- as.numeric(x)
    x
  } )
  
  # Unlist
  peaklist_reduced <- data.frame(matrix(as.numeric(unlist(peaklist_reduced)), nrow=length(peaklist_reduced), byrow=TRUE))
  colnames(peaklist_reduced) <- colnames(peaklist)
  
  # Sort
  peaklist_reduced <- peaklist_reduced[order(peaklist_reduced$pcgroup, decreasing=FALSE),]
  rownames(peaklist_reduced) <- peaklist_reduced$pcgroup
  
  # Cleanup values in peak list
  if (cleanup == TRUE) {
    # Remove NAs & negative abundances in features
    pl.tmp <- peaklist_reduced[, subset_samp]
    pl.tmp[is.na(pl.tmp)] <- 0
    pl.tmp[pl.tmp < 0] <- 0
    
    # Remove rows with only zeros
    rm_cc <- c()
    for (i in 1:nrow(pl.tmp)) {
      if (sum(pl.tmp[i,]) == 0) rm_cc <- c(rm_cc, i)
    }
    if (length(rm_cc) > 0) pl.tmp <- pl.tmp[-c(rm_cc),]
    
    # Remove constant features (rows)
    #http://stackoverflow.com/questions/15068981/removal-of-constant-columns-in-r
    pl.tmp <- pl.tmp[!apply(pl.tmp, MARGIN=1, function(x) max(x,na.rm=TRUE) == min(x,na.rm=TRUE)),]
    
    # Write back changes
    rm_row <- which(!(rownames(peaklist_reduced) %in% rownames(pl.tmp)))
    if (length(rm_row) > 0) peaklist_reduced <- peaklist_reduced[-c(rm_row),]
    
    peaklist_reduced[, subset_samp] <- pl.tmp
  }
  
  # Return data frame
  return(invisible(as.data.frame(peaklist_reduced)))
})

setGeneric("annotate", function(object, sample=NA, nSlaves=1, sigma=6, perfwhm=0.6, cor_eic_th=0.75, graphMethod="hcs",
  pval=0.05, calcCiS=TRUE, calcIso=FALSE, calcCaS=FALSE, maxcharge=3, maxiso=4, minfrac=0.5, ppm=5, mzabs=0.015, 
  quick=FALSE, psg_list=NULL,  rules=NULL, polarity="positive", multiplier=3, max_peaks=100 ,intval="into") standardGeneric("annotate"))

setMethod("annotate", "xcmsSet", function(object, sample=NA, nSlaves=1, sigma=6, perfwhm=0.6, cor_eic_th=0.75, graphMethod="hcs",
  pval=0.05, calcCiS=TRUE, calcIso=FALSE, calcCaS=FALSE, maxcharge=3, maxiso=4, minfrac=0.5, ppm=5, mzabs=0.015, 
  quick=FALSE, psg_list=NULL,  rules=NULL, polarity="positive", multiplier=3, max_peaks=100 ,intval="into") {

  #check intval
  if (!sum(intval == c("into","intb","maxo"))){
       stop("unknown intensity value!\n")
  }

  #check graphMethod
  if(!sum(graphMethod == c("hcs","lpc"))){
      stop("Unknown graphMethod value!\n")
  }

  if(quick){
    #Quick run, no groupCorr and findAdducts
    xa <- xsAnnotate(object, sample=sample, nSlaves=nSlaves);
    xa <- groupFWHM(xa, perfwhm=perfwhm, sigma=sigma, intval=intval);
    xa <- findIsotopes(xa, maxcharge=maxcharge, maxiso=maxiso, ppm=ppm, mzabs=mzabs, intval=intval, minfrac=minfrac)

  }else{

    xa  <- xsAnnotate(object, sample=sample, nSlaves=nSlaves);
    xa  <- groupFWHM(xa, perfwhm=perfwhm, sigma=sigma, intval=intval);
    xa  <- findIsotopes(xa, maxcharge=maxcharge, maxiso=maxiso, ppm=ppm, mzabs=mzabs, intval=intval, minfrac=minfrac)
    cnt <- length(xa@pspectra);
    xa  <- groupCorr(xa, cor_eic_th=cor_eic_th, graphMethod=graphMethod, calcIso=calcIso, calcCiS=calcCiS, calcCaS=calcCaS, psg_list=psg_list)
    if(!is.null(psg_list)){
      psg_list <- c(psg_list,(cnt+1):length(xa@pspectra));
    }
    xa <- findAdducts(xa, multiplier=multiplier, ppm=ppm, rules=rules, max_peaks=max_peaks, mzabs=mzabs, polarity=polarity, psg_list=psg_list);
  }
  #Kombiniere Resultate

  return(xa);
})

##Screen for mass differences
findKendrickMasses <- function(object, masses=c(14, 14.01565), maxHomologue=4, 
                               error=0.002, time=60, intval="maxo",
                               plot=FALSE) {

########Variables########
#Mass Difference in Da
nominalMass <- masses[1]
exactMass   <- masses[2]

######End Variables######

#Read peaktable

if(is.na(object@sample[1]) || length(object@xcmsSet@filepaths) > 1) {
  if(is.na(object@sample[1])){
    index <- 1:length(object@xcmsSet@filepaths);
  }else{
    index <- object@sample;
  }
  maxo   <- groupval(object@xcmsSet,value=intval)[, index, drop=FALSE];
}else{
  maxo    <- object@groupInfo[, intval]
}

#extract data (m/z, rt and intensities)
data <- cbind(object@groupInfo[, c("mz","rt")], maxo)

#check na values
if(any(sapply(data[, 1], is.na))){
  stop("No NA values allowed!")
}

ord <- order(data[, 1])
#sorting after mass(first column)
data.sorted <- data[ord, ]

#calculate kendrick masses
kendrickMass <- data.sorted[, 1] * nominalMass/exactMass;
kendrickMassDefect <- ceiling(kendrickMass) - kendrickMass
kendrickMass <- ceiling(kendrickMass)

#generate index vector
allCandidates <- 1:length(kendrickMassDefect);

#generate result list;
results <- list();

# while loop
while(length(allCandidates) > 0){
  #calculate m/z difference
  difference.mz <- kendrickMass[allCandidates] - kendrickMass[allCandidates[1]]
  #calculate rt difference
  difference.rt <- data.sorted[allCandidates, 2] - data.sorted[allCandidates[1], 2]
  #calculate remains from division
  remains <- difference.mz %% nominalMass
  #calculate quotient from division
  quotient <- difference.mz %/% nominalMass
  
  #find candidates with remains = 0 and m/z difference in allowed range
  index <- which(remains == 0 & difference.mz <= maxHomologue * nominalMass
                 & difference.mz > 0
                 & abs(difference.rt) <= abs(quotient * time)
                 & sign(time) * difference.rt > 0
                 )
  
  #do we have candidates, otherwise next
  if(length(index) < 1){
    allCandidates <- allCandidates[-1, drop=FALSE]
    next;
  }
  
  index.Kendrick <- CAMERA:::fastMatch(kendrickMassDefect[allCandidates[index]],
                                       kendrickMassDefect[allCandidates[1]], 
                                       tol=error)
  if(any(hit <- !sapply(index.Kendrick, is.null))){
    hits <- c(1, index[which(hit)])  
    results[[length(results)+1]] <- allCandidates[hits];
  }
  allCandidates <- allCandidates[-c(1, index[which(hit)]), drop=FALSE]
}

if(is.matrix(maxo)){
  addCol <- ncol(maxo)
}else{
  addCol <- 1
}
resultMatrix <- matrix(NA, nrow=0, ncol=6+addCol)

#generate Results
index <- 1
invisible(lapply(results, function(x){
  resultMatrix <<- rbind(resultMatrix, cbind(index, ord[x],kendrickMass[x],
                                             kendrickMassDefect[x], data.sorted[x, ] ))
  index <<- index + 1
}))

if(plot){
  #plot m/z - rt plot
  plot(data.sorted[, 2], data.sorted[, 1], xlab="retention time [s]", 
       ylab="m/z",main=paste("Kendrick Plot\nmz: nominal",masses[1]," exact",
                         masses[2]))
  color <- rainbow(length(results))
  index <- 0;
  lapply(results, function(x){
    index <<- index +1;
    points( data.sorted[x,2], data.sorted[x, 1], col=color[index],pch=20)
    lines(data.sorted[x,2],data.sorted[x, 1],col=color[index])
  })  
}

colnames(resultMatrix)[1:6] <- c("Index","GIndex","KMass","KMassDefect","mz","rt")
  return(invisible(resultMatrix))
}

#Find NeutralLossSpecs
#Test every pseudospectrum in a xsAnnotate object if the peaks match
#a specific mz difference. Returns a boolean vector, where a hit is marked
#with TRUE.
#object - xsAnnotate object
#mzdiff - mz difference of interest
#mzabs  - allowed absolute error in mz
#mppam  - allowed relative error in ppm (not used)

findNeutralLossSpecs <- function(object, mzdiff=NULL, mzabs=0, mzppm=10) {

  if (!class(object) == "xsAnnotate"){ 
    stop ("Parameter object is no xsAnnotate object\n")
  }
  
  if(is.null(mzdiff)){
    stop ("Parameter mzdiff must be set\n")
  }
  
  if(!is.numeric(mzdiff) || any(mzdiff <= 0)) {
    stop ("Parameter mzdiff must be a positive numeric value\n")
  }
  
  if(!is.numeric(mzabs) || mzabs < 0) {
    stop ("Parameter mzabs must be a positive numeric value\n")
  }
  
  if(!is.numeric(mzppm) || mzppm < 0) {
    stop ("Parameter mzppm must be a positive numeric value\n")
  }
  
  # get mz values from peaklist
  imz    <- object@groupInfo[, "mz"];
  
  #get Isotopes
  isotopes  <- object@isotopes;
  #Remove recognized isotopes from annotation m/z vector
  for(x in seq(along = isotopes)){
    if(!is.null(isotopes[[x]])){
      if(isotopes[[x]]$iso != 0){
        imz[x] <- NA;
      }
    }
  }
  
  #Calculate mzrange
  nlmin <- mzdiff-mzabs
  nlmax <- mzdiff+mzabs

  
  hits <- sapply(1:length(object@pspectra), function(j) {
    spec  <-  object@pspectra[[j]]

    mz  <-  CAMERA:::naOmit(imz[spec]) #mz vector
    nl  <-  as.matrix(dist(mz, method="manhattan")) #distance matrix
    hit <- FALSE;
    for(x in seq_along(mz)){
      mzadd <- mz[x]*mzppm*10^-6
      for(y in seq_along(nlmin)){
        if(any(which(nl[x,] > (nlmin[y]-mzadd) & nl[x,] < (nlmax[y] + mzadd)))){
          hit <- TRUE;
          break;
        }
      }
    }
    hit
  })
  return(hits)
}

#Find NeutralLosses
#Test every pseudospectrum in a xsAnnotate object if two peaks match
#a specific mz difference. Returns a artifical xcmsSet
#object - xsAnnotate object
#mzdiff - mz difference of interest
#mzabs  - allowed absolute error in mz
#mppam  - allowed relative error in ppm (not used)
findNeutralLoss <- function(object, mzdiff=NULL, mzabs=0, mzppm=10) {

  if (!class(object) == "xsAnnotate"){ 
    stop ("Parameter object is no xsAnnotate object\n")
  }
  
  if(is.null(mzdiff)){
    stop ("Parameter mzdiff must be set\n")
  }
  
  if(!is.numeric(mzdiff) || any(mzdiff <= 0)) {
    stop ("Parameter mzdiff must be a positive numeric value\n")
  }
  
  if(!is.numeric(mzabs) || mzabs < 0) {
    stop ("Parameter mzabs must be a positive numeric value\n")
  }
  
  if(!is.numeric(mzppm) || mzppm < 0) {
    stop ("Parameter mzppm must be a positive numeric value\n")
  }
  
  #Calculate mzrange
  nlmin <- mzdiff-mzabs
  nlmax <- mzdiff+mzabs

  #Create tempory xcmsSet object to store the peak matches
  xs <- new("xcmsSet");

  peaks <- matrix(ncol=ncol(object@groupInfo)+1, nrow=0);
 
  sampnames(xs) <- c("NeutralLoss")
  sampclass(xs) <- c("NeutralLoss")

  # get mz values from peaklist
  imz    <- object@groupInfo[, "mz"];
  
  #get Isotopes
  isotopes  <- object@isotopes;
  #Remove recognized isotopes from annotation m/z vector
  for(x in seq(along = isotopes)){
    if(!is.null(isotopes[[x]])){
      if(isotopes[[x]]$iso != 0){
        imz[x] <- NA;
      }
    }
  }
  
  #Loop over all pseudospectra
  for(j in seq(along = object@pspectra)) {
    
    #get specific pseudospectrum j
    spec  <-  object@pspectra[[j]]
    #lastcol <- ncol(spec);
    
    #get mz values
    mz  <-  imz[spec] 
    
    #generate distance matrix
    nl  <-  as.matrix(dist(mz, method="manhattan"))
    
    hits <- c();
    for(x in seq_along(mz)){
      mzadd <- mz[x]*mzppm*10^-6
      if(is.na(mzadd)){
        next;
      }
      for(y in seq_along(nlmin)){
        if(length( index <- which(nl[x,] > (nlmin[y]-mzadd) & 
          nl[x,] < (nlmax[y] + mzadd))) > 0 ){
          #found hit
          hits <- rbind(hits,cbind(x,index))
        }
      }
    }
    if(is.null(hits)){
      next;
    }
    ## Remove hit from lower diagonal matrix
    index <- which(apply(hits,1, function(x) x[1] > x[2]))
    if(length(index) > 0){
      hits <- hits[-index,,drop=FALSE]
    }
    # Remove duplicated
    hits <- unique(hits)
    # loop over all hits
    for (f in seq_len(nrow(hits))) {
      # hit f
      hit <- hits[f,]
        
      

      #For each peak-pair save the greater peak(higher mz value) to peak table
      if (mz[hit[1]] > mz[hit[2]]) {
        newpeak <- object@groupInfo[spec[hit[1]],, drop=FALSE]
      } else {
        newpeak <- object@groupInfo[spec[hit[2]],, drop=FALSE]
      }  
      peaks <- rbind(peaks, cbind(newpeak,j))
    }
  }
  colnames(peaks) <- c(colnames(object@groupInfo),"compound spectrum");
  #Store peaks into the xcmsSet
  peaks(xs) <- as.matrix(peaks)
  invisible(xs)
}

msgfun.snowParallel <- function(x,i){
  cat("Sending task # ",i,"\n");
  flush.console();
}

cleanParallel <- function(object){
  ##testing objects
  if (!class(object) == "xsAnnotate"){
    stop ("xsa.pos is no xsAnnotate object")
  }
  
  if(object@runParallel$enable == 1){
    if(object@runParallel$mode == "Rmpi"){
      mpi.close.Rslaves()
    } else if(object@runParallel$mode == "snow" & !(is.null(object@runParallel$cluster))){
      stopCluster(object@runParallel$cluster);
    }
    cat("Slaves were stopped!\n");
  }
}
  
###End xsAnnotate exported Methods###


###xsAnnotate intern Function###

combinexsAnnos <- function(xsa.pos, xsa.neg, pos=TRUE, tol=2, ruleset=NULL){
  # two xsAnnotate objects (pos,neg)
  # pos: returns pos. (true) or neg. (false) peaklist;  default = TRUE
  # tol: max allowed time difference between pos and neg pseudospectra
  
  ##testing objects
  if (!class(xsa.pos) == "xsAnnotate"){
    stop ("xsa.pos is no xsAnnotate object")
  }
  if (!class(xsa.neg) == "xsAnnotate"){ 
    stop ("xsa.neg is no xsAnnotate object")
  }
  if (xsa.pos@polarity != "positive" & xsa.neg@polarity != "negative"){
    stop ("xsAnnotate object have unknown polarities.\nOnly positive or negative are allowed!")
  }
  
  ##1. Step
  #get all rts for every pseudospectra
  rt1 <- sapply(xsa.pos@pspectra, function(x) { 
          mean(xsa.pos@groupInfo[x, "rt"]) 
    })
  
  rt2 <- sapply(xsa.neg@pspectra, function(x) { 
          mean(xsa.neg@groupInfo[x, "rt"]) 
    })
  
  #find matching pseudospectra
  ps.match <- CAMERA:::fastMatch(rt1, rt2, tol=tol);
  rules.pos <- xsa.pos@ruleset;
  rules.neg <- xsa.neg@ruleset;
  
  #ruleset
  if(is.null(ruleset)){
    #generate M+H,M-H rule
    if(pos){
      name <- "M+H/M-H";
    }else{
      name <- "M-H/M+H";
    }
    ruleset <- data.frame(name, 1, 1, 1, 1, 2.014552, 1, 1); 
    colnames(ruleset) <- c("name", "nmol.pos", "nmol.neg", "charge.pos", "charge.neg", 
                           "massdiff", "ID.pos", "ID.neg");
  }else{
    #generate rules stated in ruleset
    if(!is.matrix(ruleset) || ncol(ruleset) != 2){
      stop("Ruleset is no matrix or number of cols is unequal two");
    }else{
      #check provided ruleset
      name <- c(); 
      nmol.pos <- c(); 
      nmol.neg <- c();
      charge.pos <- c(); 
      charge.neg <- c();
      massdiff   <- c();
         apply(ruleset,1, function(x) {
            name <<- c(name,paste(rules.pos[x[1],"name"],rules.neg[x[2],"name"],sep="/"));
            nmol.pos <<- c(nmol.pos,rules.pos[x[1],"nmol"]);
            nmol.neg <<- c(nmol.neg,rules.neg[x[2],"nmol"]);  
            charge.pos <<- c(charge.pos,rules.pos[x[1],"charge"]);
            charge.neg <<- c(charge.neg,rules.neg[x[2],"charge"]);
            massdiff <<- c(massdiff, rules.pos[x[1],"massdiff"] - rules.neg[x[2],"massdiff"]);
            return();
        })
      #check charges
      if(! all(charge.pos == abs(charge.neg))){
        stop("Ruleset can't combine rules with unequal charges")
      }
      ruleset <- data.frame(name,nmol.pos,nmol.neg,charge.pos,charge.neg,massdiff,ruleset);
      colnames(ruleset) <- c("name", "nmol.pos", "nmol.neg","charge.pos","charge.neg", "massdiff", "ID.pos","ID.neg");
    }
  }

  #allocate variable for matching results 
  endresult <- matrix(ncol=6, nrow=0);
  colnames(endresult) <- c("grp.pos", "peak.pos", "grp.neg", "peak.neg", "mass", "rule")
   
  #Annotations from positive Samples
  annoID.pos  <- xsa.pos@annoID; 
  annoGrp.pos <- xsa.pos@annoGrp;
  #Annotations from negative Samples
  annoID.neg  <- xsa.neg@annoID;
  annoGrp.neg <- xsa.neg@annoGrp;
  
  #Groups which can be deleted
  grp2del  <- c();
  grp2save <- c();
  peaklist <- c(); #set global variable

  cat ("Run combining of xsAnnotates\n % finished: ");
  lp <- -1;
  ncl<-sum(sapply(ps.match,length));
  npeaks.global <- 0;
  
  #for every pseudospectra
  for(i in seq(along=ps.match)){
    percentOutput(npeaks.global, length(ps.match[[i]]), ncl, lp)
    #check for match
    if(is.null(ps.match[[i]])){
      #no corresponding match in neg sample
      next;
    }

    #matrix stores matches
    result.matrix <- matrix(ncol=4, nrow=0);
    colnames(result.matrix) <- c("pos.peak.ID", "neg.peak.ID", "j", "rule");
    
    #get all m/z values from pos pseudospectrum
    grp.pos <- getpspectra(xsa.pos, i);
    m.pos <- grp.pos[, "mz"]

    #remove m/z values of annotated isotope peaks
    for(k in 1:nrow(grp.pos)){
      if(!is.null(xsa.pos@isotopes[[xsa.pos@pspectra[[i]][k]]])){
        if(xsa.pos@isotopes[[xsa.pos@pspectra[[i]][k]]]$iso > 0){
          m.pos[k] <- NA;
        }
      }
    }
    
    na.ini.pos <- which(!is.na(m.pos)); #index of non NA values
    
    if(length(na.ini.pos) < 1){
      next;
    }
    #get all m/z hypotheses (if exists)
    if(length(index <- which(annoGrp.pos[, 4] == i)) > 0){
       masslist.pos <- annoGrp.pos[index, 2];
    }else{
       #no annotation exists
       masslist.pos <- NULL;
    }
    
    #for every matching neg pseudospectra
    for(j in seq(along=ps.match[[i]])){
      #get all m/z values from neg pseudospectrum
      grp.neg <- getpspectra(xsa.neg, ps.match[[i]][j]);
      m.neg   <- grp.neg[,"mz"]
      #remove masses of annotated isotope peaks
      for(k in 1:nrow(grp.neg)){
        if(!is.null(xsa.neg@isotopes[[xsa.neg@pspectra[[ps.match[[i]][j]]][k]]])){
          if(xsa.neg@isotopes[[xsa.neg@pspectra[[ps.match[[i]][j]]][k]]]$iso > 0){
            m.neg[k] <- NA;
            next;
          }
        }
      }

      na.ini.neg <- which(!is.na(m.neg));#index of non NA values
      if(length(na.ini.neg) < 1){
        next;
      }
      #get all m/z hypotheses (if exists)
      if(length(index <- which(annoGrp.pos[, 4] == ps.match[[i]][j])) > 0){
        masslist.neg <- annoGrp.pos[index, 2];
      }else{
        #no annotation exists
        masslist.neg <- NULL;
      }

      #match rules against m/z values
      results <- matrix(NA, nrow=length(m.pos), ncol=nrow(ruleset));
      results[na.ini.pos, ] <- CAMERA:::combineHypothese(naOmit(m.pos), naOmit(m.neg), 
                                                ruleset, na.ini.pos, na.ini.neg);
      
      if(!all(is.na(results))){
        #Found Hits between pos and neg datasets with ruleset
        for(l in 1:ncol(results)){
          index <- which(!is.na(results[, l]));
          if(length(index) > 0){
            #peak index of matching peaks
            #first column pos, second neg
            tmp <- matrix(c(xsa.pos@pspectra[[i]][index], 
                            xsa.neg@pspectra[[ps.match[[i]][j]]][results[index, l]]),
                          ncol=2);
            result.matrix <- rbind(result.matrix, cbind(tmp,ps.match[[i]][j],l));
          }
        }
      }
    }
    
    if(! nrow(result.matrix) > 0){
      next; #Found no hit
    }

    for(ii in 1:nrow(result.matrix)){
      if(length(index <- which(annoID.pos[, "id"] == result.matrix[ii, 1] )) > 0){
        #Peak has annotation(s)
        if(all(annoID.pos[index, "ruleID"] == ruleset[result.matrix[ii,4], "ID.pos"])){
          #Peak has only one annotation and could be verified
          mass <- 2;
          endresult <- rbind(endresult, c(i, result.matrix[ii, 1], result.matrix[ii, 3],
                                          result.matrix[ii, 2], mass, result.matrix[ii, 4]));
          grp2save  <- c(grp2save, annoID.pos[index, "grpID"]);
        }else{
          #Peak has more than one annotation or verfication goes wrong
          grp.save <- which(annoID.pos[index, "ruleID"] == ruleset[result.matrix[ii, 4], "ID.pos"]);
          if(length(grp.save) > 0 ){
            #Save verified annotation and remove from index
            grp2save  <- c(grp2save, annoID.pos[index[grp.save], "grpID"]);
            index <- index[-grp2save];
            mass <- 2;
            endresult <- rbind(endresult, c(i, result.matrix[ii, 1], result.matrix[ii, 3],
                                            result.matrix[ii, 2], mass,result.matrix[ii, 4]));  
          }else{
            #Found new annotation
            mass <- 1;
            endresult <- rbind(endresult, c(i, result.matrix[ii, 1], result.matrix[ii, 3],
                                            result.matrix[ii, 2], mass, result.matrix[ii, 4]));
          }  
          #delete all other hypotheses
          grp2del  <- c(grp2del, annoID.pos[index, "grpID"]);
        }
      }else{
        #Peak has no annotation
        #Add annotation according to ruleset
        mass <- 1;
        endresult <- rbind(endresult, c(i, result.matrix[ii, 1], result.matrix[ii, 3],
                                        result.matrix[ii, 2], mass, result.matrix[ii, 4]));
      }
    }
  }#end for i loop

  #Remove grp2del groups, if they are in grp2save
  grp2del <- unique(grp2del);
  grp2save <- unique(grp2save);
  if(length(grp2del) > 0 & length(grp2save) > 0){
    index <- which(grp2del %in% grp2save);
    if(length(index) > 0){
      grp2del <- grp2del[-index];
    }
  }
  cat("\nGenerate Peaklist....\n");
  if(pos){
    #return postive peaklist
    if(length(grp2del) > 0){
      index <- which(annoID.pos[,2] %in% grp2del);
      #annotation to delete
      if(length(index) > 0){
        annoID.pos <- annoID.pos[-index,];
      }
    }

    #new vector to peaklist
    add.adducts <- vector("character", nrow(xsa.pos@groupInfo));
    old.grpid   <- max(annoGrp.pos[, 1]); #counter for grp-ID

    if(nrow(endresult) > 0){
      for(i in 1:nrow(endresult)){
        if(endresult[i, 5] == 1){
          old.grpid <- old.grpid + 1;
          rule.ID <- ruleset[endresult[i,6], "ID.pos"];
          annoID.pos  <- rbind(annoID.pos, c(endresult[i, "peak.pos"], old.grpid, rule.ID, NA))
          mass <- (xsa.pos@groupInfo[endresult[i,"peak.pos"],"mz"]*rules.pos[rule.ID,"charge"] - rules.pos[rule.ID,"massdiff"]) / rules.pos[rule.ID,"nmol"];
          annoGrp.pos <- rbind(annoGrp.pos, c(old.grpid,mass,2,endresult[i,1]))
        }
        add.adducts[endresult[i,"peak.pos"]]<-paste("Found",ruleset[endresult[i,6],1]);
      }
    }
    #save results
    xsa.pos@derivativeIons <- getderivativeIons(annoID.pos, annoGrp.pos, rules.pos, 
                                                length(xsa.pos@isotopes));
    peaklist <- getPeaklist(xsa.pos);
    index    <- ncol(peaklist);
    peaklist<-cbind(peaklist,add.adducts);
    colnames(peaklist)[index+1] <- "neg. Mode"
#     return(peaklist);

  }else{
    #return negative peaklist
    if(length(grp2del) > 0){
      index <- which(annoID.neg[,2] %in% grp2del);
      #annotation to delete
      if(length(index) > 0){
        annoID.neg <- annoID.neg[-index,];
      }
    }

    #new vector to peaklist
    add.adducts <- vector("character", nrow(xsa.neg@groupInfo));
    old.grpid   <- max(annoGrp.neg[, 1]);

    if(nrow(endresult) > 0){
      for(i in 1:nrow(endresult)){
        if(endresult[i, 5] == 1){
          old.grpid <- old.grpid + 1;
          rule.ID <- ruleset[endresult[i,6],"ID.neg"];
          annoID.neg  <- rbind(annoID.neg,  c(endresult[i,"peak.neg"],old.grpid,rule.ID,NA))
          mass <- (xsa.neg@groupInfo[endresult[i,"peak.neg"],"mz"]* abs(rules.neg[rule.ID,"charge"]) - rules.neg[rule.ID,"massdiff"]) / rules.neg[rule.ID,"nmol"];
          annoGrp.neg <- rbind(annoGrp.neg, c(old.grpid,mass,2,endresult[i,1]))
        }
        add.adducts[endresult[i,"peak.neg"]]<-paste("Found",ruleset[endresult[i,6],1]);
      }
    }

    xsa.neg@derivativeIons <- getderivativeIons(annoID.neg,annoGrp.neg,rules.neg,length(xsa.neg@isotopes)); #save results
    peaklist<-getPeaklist(xsa.neg);
    index<-ncol(peaklist)
    peaklist<-cbind(peaklist,add.adducts);
    colnames(peaklist)[index+1] <- "pos. Mode"
  }

  return(peaklist);
}    


combineHypothese <- function(mass.pos, mass.neg, ruleset, ini1, ini2, tol=0.02){
    ##generiere neue Peaklist
    tmp.ruleset <- ruleset[,c("name","nmol.pos","charge.pos","massdiff")];
    colnames(tmp.ruleset) <- c("name","nmol","charge","massdiff")
    ML <- massDiffMatrix(mass.pos,tmp.ruleset)
    ML.match <- apply(ML,2,function(x) {
      m <- fastMatch(x,mass.neg,tol=tol);
      unlist(lapply(m, function(x) { if(is.null(x)){return(NA)}else{return(ini2[x[1]])} })); ##Select first hit, if more than one??
    });
    return(ML.match);
}

##Speed-up function for ignoring NA values
naOmit <- function(x) {
  return (x[!is.na(x)]);
}

# Create complete feature table
# xs - xcmsSet object
# method - groupval parameter method
# value - groupval parameter method
getPeaks_selection <- function(xs, method="medret", value="into"){
  
  if (!class(xs) == "xcmsSet") {
    stop ("Parameter xs is no xcmsSet object\n")
  }
  
  # Testing if xcmsSet is grouped
  if (nrow(xs@groups) > 0 && length(xs@filepaths) > 1) {
    # get grouping information
     groupmat <- groups(xs)
     # generate data.frame for peaktable
     ts <- data.frame(cbind(groupmat, groupval(xs, method=method, value=value)), row.names = NULL)
     #rename column names
     cnames <- colnames(ts)
     if (cnames[1] == 'mzmed') {
       cnames[1] <- 'mz' 
     } else { 
       stop ('Peak information ?!?')
     }
     if (cnames[4] == 'rtmed') {
       cnames[4] <- 'rt' 
     } else {
       stop ('Peak information ?!?')
     }
     colnames(ts) <- cnames
  } else if (length(sampnames(xs)) == 1) { #Contains only one sample?
    ts <- xs@peaks
  } else {
    stop ('First argument must be a xcmsSet with group information or contain only one sample.')
  }
  
  return(as.matrix(ts))
}


# Returns peak table of one! sample from a xcmsSet
# Does not return the feature table (result from group)
# xs - xcmsSet object
# index - sample index
# index = -1 return complete peak table overall sample
getPeaks <- function(xs, index=1){
 
  if (!class(xs) == "xcmsSet") {
    stop ("Parameter xs is no xcmsSet object\n")
  }
  if (!is.numeric(index) && index > length(sampnames(xs)) | index < -1 | index == 0) {
    stop("Parameter index must be between 1 and number of samples or -1\n")
  }
  
  #Testing if xcmsSet is grouped
  if (nrow(xs@groups) > 0) {
    #Should all peaks returned
    if(index == -1) {
      ts <- xs@peaks;
    } else {
      #get peak indices for sample index
      peaki <- getPeaksIdxCol(xs,NULL)[,index]
      #extract peaks from sample index
      ts <- xs@peaks[peaki,]
    }
  } else if (length(sampnames(xs)) == 1) {# xs not grouped, testing if it only contains one sample
        ts <- xs@peaks
  } else {
    stop ('First argument must be a xcmsSet with group information or contain only one sample.')
  }
  
  return(as.matrix(ts))
}


is.wholenumber <- function(x, tol = .Machine$double.eps^0.5)  abs(x - round(x)) < tol

#function write percent Output
percentOutput <- function(len.old, len.add, len.max, cnt){
  len <- len.old + len.add;
  perc <- round((len) / len.max * 100)
  perc <- perc %/% 10 * 10;
  if (perc != cnt && perc != 0) { 
    cat(perc,' '); 
    cnt2 <- perc;
    eval.parent(substitute(cnt <- cnt2))
  }
  if (.Platform$OS.type == "windows"){ 
    flush.console();
  }
  eval.parent(substitute(len.old <- len))
}
###End xsAnnotate intern Function###
sneumann/CAMERA documentation built on April 5, 2024, 2:33 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
sneumann/CAMERA
Collection of annotation related methods for mass spectrometry data

R/xsAnnotate.R
In sneumann/CAMERA: Collection of annotation related methods for mass spectrometry data

Defines functions percentOutput is.wholenumber getPeaks getPeaks_selection naOmit combineHypothese combinexsAnnos cleanParallel msgfun.snowParallel findNeutralLoss findNeutralLossSpecs findKendrickMasses getpspectra annotateDiffreport xsAnnotate

Documented in annotateDiffreport cleanParallel combinexsAnnos findKendrickMasses findNeutralLoss findNeutralLossSpecs getpspectra xsAnnotate

R Package Documentation

Browse R Packages

We want your feedback!

sneumann/CAMERA Collection of annotation related methods for mass spectrometry data

R/xsAnnotate.R In sneumann/CAMERA: Collection of annotation related methods for mass spectrometry data

Defines functions percentOutput is.wholenumber getPeaks getPeaks_selection naOmit combineHypothese combinexsAnnos cleanParallel msgfun.snowParallel findNeutralLoss findNeutralLossSpecs findKendrickMasses getpspectra annotateDiffreport xsAnnotate

Documented in annotateDiffreport cleanParallel combinexsAnnos findKendrickMasses findNeutralLoss findNeutralLossSpecs getpspectra xsAnnotate

R Package Documentation

Browse R Packages

We want your feedback!

sneumann/CAMERA
Collection of annotation related methods for mass spectrometry data

R/xsAnnotate.R
In sneumann/CAMERA: Collection of annotation related methods for mass spectrometry data