R/DataProcessing.R

Defines functions colorVectorToStringForEval numericVectorToStringForEval regExExtraction getMS2plotData getSpectrumStatistics getPrecursorSetFromTreeSelection getPrecursorSetFromTreeSelections getTableFromPrecursorSet getMS2spectrumInfoForCluster getMS2spectrumInfoForPrecursor getMS2spectrumInfoForPrecursorLeaf getMS2spectrum getMetFragLink getPrecursorColors setOfAnnotationSetsToSetOfColorSets precursorSetToSetOfAnnotationSets castListEntries deserializeParameterSetKeyValuePairs deserializeParameterSet serializeParameterSet deserializeParameterSetFile serializeParameterSetFile deserializeSampleSelectionAndOrder serializeSampleSelectionAndOrder processMS1data readProjectData readClusterDataFromProjectFile sparseMatrixToString data.numericmatrix

Documented in castListEntries data.numericmatrix processMS1data readClusterDataFromProjectFile readProjectData

#' Convert data.frame columns to numeric
#'
#' The data.numericmatrix() function works similar to base::data.matrix()
#' before R-4.0.0 converting character columns to numeric without converting 
#' to factor first, thus returning the actual numeric values.
#' 
#' @param x The data.frame to convert
#'
#' @return A matrix with all columns converted to numeric
#' @export
#'
#' @examples
#' data.numericmatrix(data.frame(a = c("1", "2", "3"), 
#'                               b = c("4", "5", "6")))
#' 
data.numericmatrix <- function(x) {
  for (i in 1:ncol(x)) {
    if (is.character(x[, i])) {
      x[, i] <- as.numeric(as.character(x[, i]))
    }
  }
  as.matrix(x)
}


#########################################################################################
## annotate and process matrix
sparseMatrixToString <- function(matrixRows, matrixCols, matrixVals, parameterSet){
  matrixRows <- c(matrixRows, 1)
  matrixCols <- c(matrixCols, 1)
  matrixVals <- c(matrixVals, serializeParameterSet(parameterSet))
  
  ## TODO performance 25s
  ## convert matrix to dataframe
  numberOfRows    <- max(matrixRows)
  numberOfColumns <- max(matrixCols)
  
  lines <- vector(mode = "character", length = numberOfRows)
  for(rowIdx in seq_len(numberOfRows)){
    indeces <- matrixRows == rowIdx
    tokens  <- vector(mode = "character", length = numberOfColumns)
    tokens[matrixCols[indeces]] <- matrixVals[indeces]
    lines[[rowIdx]] <- paste(tokens, collapse = "\t")
  }
  
  return(lines)
}

#' Read MetFamily Project data saved by the export function
#'
#' Supports reading from plain and gzip'ed files
#' 
#' @param file Path to file to read
#' @param progress Whether to update a shiny Progress bar
#'
#' @return A big dataList. 
#' 
#' @seealso [readProjectData]
#' @export
#'
#' @examples
readClusterDataFromProjectFile <- function(file, progress = FALSE)
{
  if(!is.na(progress))  
    if(progress)  
      setProgress(value = 0, detail = "Parsing") 
  else 
    print("Parsing")
  
  extension <- file_ext(file)
  
  if(extension == "gz") {
    file <- gzfile(file, "r")
  } else {
    file <- file(file, "r")
  }
  
  suppressWarnings(
    fileLines <- readLines(con = file)
  )
  base::close(con = file)
  
  dataList <- readProjectData(fileLines = fileLines, progress = progress)
  fileLines <- NULL
  
  return(dataList)
}


#' Read MetFamily Project data saved by the export function
#'
#' @param fileLines Character vector with content of a project file
#' @param progress Whether to update a shiny Progress bar
#'
#' @return A big dataList. 
#' 
#' @seealso [processMS1data]
#' @export
#'
#' @examples
readProjectData <- function(fileLines, progress = FALSE)
{
  allowedTags <- c("ID")
  allowedTagPrefixes <- c("AnnotationColors=")
  
  ##################################################################################################
  ## parse data
  if(!is.na(progress))  
    if(progress)  
      incProgress(amount = 0.1, detail = "Preprocessing") 
  else 
    print("Preprocessing")
  
  numberOfRows <- length(fileLines)
  numberOfMS1features <- as.integer(numberOfRows - 3)
  
  ## header
  line1Tokens <- strsplit(x = fileLines[[1]], split = "\t")[[1]]
  line2Tokens <- strsplit(x = fileLines[[2]], split = "\t")[[1]]
  line3Tokens <- strsplit(x = fileLines[[3]], split = "\t")[[1]]
  
  ## metabolite profile vs fragmentMatrix
  numberOfColumns <- length(line1Tokens)
  line1TokensOffset <- 2
  fragmentMatrixStart <- min(which(line1Tokens[(1 + line1TokensOffset):numberOfColumns] != "")) + line1TokensOffset
  numberOfMetaboliteProfileColumns <- fragmentMatrixStart - 1
  numberOfFragmentGroups <- numberOfColumns - numberOfMetaboliteProfileColumns
  
  ## extract infos from header
  importParameters <- line1Tokens[[1]]
  if(nchar(importParameters) == 0){
    ## import parameterSet not there: backward compatibility - add if not there
    importParameters <- "ImportParameters={projectName=MetFamily project; projectDescription=; toolVersion=MetFamily 1.0; minimumIntensityOfMaximalMS2peak=2000; minimumProportionOfMS2peaks=0.05; mzDeviationAbsolute_grouping=0.01; mzDeviationInPPM_grouping=10; doPrecursorDeisotoping=TRUE; mzDeviationAbsolute_precursorDeisotoping=0.001; mzDeviationInPPM_precursorDeisotoping=10; maximumRtDifference=0.02; doMs2PeakGroupDeisotoping=FALSE; mzDeviationAbsolute_ms2PeakGroupDeisotoping=0.01; mzDeviationInPPM_ms2PeakGroupDeisotoping=10; proportionOfMatchingPeaks_ms2PeakGroupDeisotoping=0.9; mzDeviationAbsolute_mapping=0.01; minimumNumberOfMS2PeaksPerGroup=1; neutralLossesPrecursorToFragments=TRUE; neutralLossesFragmentsToFragments=FALSE}"
  }
  groupSampleDataFrameFieldValue <- line1Tokens[[2]]
  
  fragmentGroupsNumberOfFramgents <- as.integer(line1Tokens[fragmentMatrixStart:numberOfColumns])
  line1Tokens <- NULL
  
  tagsSector <- line2Tokens[seq_len(numberOfMetaboliteProfileColumns)]
  fragmentGroupsAverageIntensity <- as.numeric(line2Tokens[fragmentMatrixStart:numberOfColumns])
  line2Tokens <- NULL
  
  metaboliteProfileColumnNames <- line3Tokens[seq_len(numberOfMetaboliteProfileColumns)]
  fragmentGroupsAverageMass <- as.numeric(line3Tokens[fragmentMatrixStart:numberOfColumns])
  line3Tokens <- NULL
  
  if(any(duplicated(metaboliteProfileColumnNames)))
    stop(paste("Duplicated column names in the metabolite profile: ", 
               paste(sort(unique(metaboliteProfileColumnNames[duplicated(metaboliteProfileColumnNames)])), collapse = "; ")))
  
  #########################################################################
  ## extract metabolite profile and fragment matrix
  metaboliteProfile <- as.data.frame(matrix(nrow = numberOfMS1features, 
                                            ncol = numberOfMetaboliteProfileColumns))
  colnames(metaboliteProfile) <- metaboliteProfileColumnNames
  
  listMatrixRows <- list()
  listMatrixCols <- list()
  listMatrixVals <- list()
  
  lastOut <- proc.time()["user.self"]
  lastRow <- 1
  for(rowIdx in seq_len(numberOfMS1features)){
    time <- proc.time()["user.self"]
    if(time - lastOut > 1){
      lastOut <- time
      rowProgress <- (rowIdx - lastRow) / numberOfMS1features
      lastRow <- rowIdx
      if(!is.na(progress))  
        if(progress)  
          incProgress(amount = rowProgress*0.2,     
                      detail = paste("Preprocessing ", rowIdx, " / ", numberOfMS1features, sep = "")) 
      else 
        print(paste("Preprocessing ", rowIdx, " / ", numberOfMS1features, sep = ""))
    }
    
    lineIdx <- rowIdx + 3
    tokens <- str_split(string = fileLines[[lineIdx]], pattern = "\t")[[1]]
    
    ## metabolite profile
    metaboliteProfile[rowIdx, ] <- tokens[seq_len(numberOfMetaboliteProfileColumns)]
    
    ## fragment matrix
    tokens <- tokens[fragmentMatrixStart:numberOfColumns]
    nonEmpty <- tokens != ""
    indeces <- which(nonEmpty)
    numberOfEntries <- length(indeces)
    listMatrixRows[[rowIdx]] <- rep(x = rowIdx, times = numberOfEntries)
    listMatrixCols[[rowIdx]] <- indeces
    listMatrixVals[[rowIdx]] <- tokens[nonEmpty]
  }
  matrixRows <- as.integer(unlist(listMatrixRows))
  matrixCols <- as.integer(unlist(listMatrixCols))
  matrixVals <- as.numeric(unlist(listMatrixVals))
  listMatrixRows <- NULL
  listMatrixCols <- NULL

  ## Disable command line reading of answer
  if (FALSE) {
  ################################################################################
  #Start of importing  annotation part1 from two
  # Display the message and give the user the option to choose whether to upload the annotation file or not. 
  #If Y shows selection window for annotation file. if N ignores annotation process
  #message("Do you want to upload the annotation file? (Y/N)")
  #user_choice <- readline()
  user_choice <- "N"
  
  if (toupper(user_choice) == "Y") {
    

    # Read the annotation_file file (if needed)
    annotation_file <- read.delim(file.choose(), header = TRUE, check.names = FALSE) # select interactively
    
    # Display the available columns in annotation_file
    message("Available columns in annotation_file:")
    available_columns <- colnames(annotation_file)
    for (i in 1:length(available_columns)) {
      message(paste(i, "-", available_columns[i]))
    }
    
    # Prompt the user to select the column containing IDs
    message("Enter the number corresponding to the column containing IDs:")
    selected_column_id <- as.integer(readline())
    
    # Check if the selected column index is valid
    if (selected_column_id >= 1 && selected_column_id <= length(available_columns)) {
      id_column <- available_columns[selected_column_id]
      
      # Prompt the user to select the Annotation column to use
      message("Enter the number corresponding to the annotation column:")
      selected_column_annot <- as.integer(readline())
      
      # Check if the selected column index is valid
      if (selected_column_annot >= 1 && selected_column_annot <= length(available_columns)) {
        selected_column <- available_columns[selected_column_annot]
        
        # Iterate through all values in the "Annotation" column of metaboliteProfile, excluding first row
        for (i in 1:nrow(metaboliteProfile)) {
          # Perform the lookup based on metaboliteProfile's "Alignment ID" column and annotation_file's selected ID column
          matching_indices <- which(annotation_file[[id_column]] == metaboliteProfile$'Alignment ID'[i])
          
          # Check data types and unique values of IDs column in annotation_file
          
          # Check if any matches were found
          if (length(matching_indices) > 0) {
            # Update the specified column (Annotation) in metaboliteProfile with the corresponding value from annotation_file
            metaboliteProfile[i, "Annotation"] <- annotation_file[matching_indices[1], selected_column]
          } else {
            # Handle the case where no match was found (you can add custom logic here)
            warning(paste("No match found for row", i, "in metaboliteProfile"))
          }
        }
      } else {
        message("Invalid column selection. Skipping annotation step.")
      }
    }
  }
  
  #####################################################################################################################################
  #end of importing  annotation part1 from two
  }
  
  listMatrixVals <- NULL
  
  ## header
  dataFrameHeader <- cbind(
    data.frame(rbind(
      c(importParameters, rep(x = "", times = numberOfMetaboliteProfileColumns - 1)),
      tagsSector,
      metaboliteProfileColumnNames), stringsAsFactors = FALSE),
    data.frame(rbind(
      fragmentGroupsNumberOfFramgents,
      fragmentGroupsAverageIntensity,
      fragmentGroupsAverageMass
    ), stringsAsFactors = FALSE)
  )
  headerLabels <- c("HeaderForFragmentCounts", 
                    "HeaderForGroupsAndFragmentIntensities", 
                    "Header")
  rownames(dataFrameHeader) <- headerLabels
  headerColumnNames <- c(metaboliteProfileColumnNames, fragmentGroupsAverageMass)
  colnames(dataFrameHeader) <- headerColumnNames
  
  ## import parameterSet
  importParameterSet <- deserializeParameterSet(importParameters)
  
  ## insert annotation column if not there
  annotationColorsName <- "AnnotationColors"
  annotationColorsMapInitValue <- paste(annotationColorsName, "={}", sep = "")
  annotationColumnName <- "Annotation"
  if(!any(metaboliteProfileColumnNames == annotationColumnName, na.rm = TRUE)){
    ## annotation column backward compatibility - insert if not there
    target <- 2
    
    if(target == 0 | target == numberOfMetaboliteProfileColumns)
      stop("Cannot insert column!")
    
    metaboliteProfile <- cbind(
      metaboliteProfile[,seq_len(target),drop=F], 
      as.data.frame(x = rep(x = "", times = numberOfMS1features), stringsAsFactors = FALSE), 
      metaboliteProfile[, (target+1):numberOfMetaboliteProfileColumns, drop=FALSE]
    )
    dataFrameHeader <- cbind(
      dataFrameHeader[,seq_len(target),drop=F], 
      as.data.frame(x = rep(x = "", times = numberOfMS1features), stringsAsFactors = FALSE), 
      dataFrameHeader[, (target+1):numberOfColumns, drop=FALSE]
    )
    numberOfMetaboliteProfileColumns <- numberOfMetaboliteProfileColumns + 1
    metaboliteProfileColumnNames <- c(metaboliteProfileColumnNames[seq_len(target)], 
                                      annotationColumnName, 
                                      metaboliteProfileColumnNames[(target+1):numberOfMetaboliteProfileColumns])
    colnames(metaboliteProfile) <- metaboliteProfileColumnNames
    headerColumnNames <- c(metaboliteProfileColumnNames, fragmentGroupsAverageMass)
    colnames(dataFrameHeader) <- headerColumnNames
    
    dataFrameHeader[2, target + 1] <- annotationColorsMapInitValue
    dataFrameHeader[3, target + 1] <- annotationColumnName
  }

  ## STN: Disabled. 
  if (FALSE) {
  #Start of importing annotation part2 from two
  ################################################################################
   #adding HEX color codes from external annotations to the annotationColorsMapInitValue of dataFrameHeader
  if (toupper(user_choice) == "Y") {
      # Copy the selected column by user, Remove duplicates and exclude the first row
    uniqueAnnotations <- unique(unlist(strsplit(metaboliteProfile$Annotation, ",")))
    uniqueAnnotations <- paste0(uniqueAnnotations, "=")
    # Add a random string from the hex color list to each element of uniqueAnnotions
    # strings_list <- c("#000000", "#FFFFFF", "#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF", "#800000", "#008000", "#000080", "#808000", "#800080", "#008080", "#808080", "#C0C0C0", "#FFA500", "#FFC0CB", "#FFD700", "#A52A2A")
    # uniqueAnnotations <- paste0(uniqueAnnotations, sample(strings_list, length(uniqueAnnotations), replace = TRUE))
    allowedCols <- c("blue", "red", "yellow", "green", "brown", "deepskyblue", "orange", "deeppink", "aquamarine", "burlywood", "cadetblue", "coral", "cornflowerblue", "cyan", "firebrick", "goldenrod", "indianred", "khaki", "magenta", "maroon", "beige", "moccasin", "olivedrab", "orangered", "orchid", "paleturquoise3", "rosybrown", "salmon", "seagreen3", "skyblue", "steelblue", "#BF360C", "#33691E", "#311B92", "#880E4F", "#1A237E", "#006064", "#004D40", "#FF6F00", "#E65100")
    uniqueAnnotations <- paste0(uniqueAnnotations, sample(allowedCols, length(uniqueAnnotations), replace = TRUE))
    # Format uniqueAnnotations into a single line with comma-separated values
    uniqueAnnotations1 <- paste(uniqueAnnotations, collapse = ", ")
    #uniqueAnnotationsHexs <- paste("AnnotationColors={", paste(uniqueAnnotations1, collapse = ","), "}")# this line introduces a space after the first Item of the object, therefore, replaced with the following to remove the space
    uniqueAnnotationsHexs <- gsub("AnnotationColors=\\{\\s+", "AnnotationColors={", paste("AnnotationColors={", paste(uniqueAnnotations1, collapse = ","), "}"))
    # Assuming dataFrameHeader is your data frame
    dataFrameHeader$Annotation[2] <- uniqueAnnotationsHexs
  }
################################################################################
#End of importing  annotation part2 from two
  }
  
  annotationColumnIndex <- which(metaboliteProfileColumnNames == annotationColumnName)
  annotationColorsValue <- dataFrameHeader[2, annotationColumnIndex]
  
  dataFrameMS1Header <- dataFrameHeader[, seq_len(numberOfMetaboliteProfileColumns)]
  
  ##################################################################################################
  ## MS1 feature IDs
  
  ## mz/rt is aligned by '.'
  mzs <- metaboliteProfile[, "m/z"]
  rts <- metaboliteProfile[, "RT"]
  
  ## add .0 if necessary
  for(i in seq_len(numberOfMS1features))
    if(length(grep(x = mzs[[i]], pattern = ".*\\..*")) == 0)
      mzs[[i]] <- paste(mzs[[i]], ".0", sep = "")
  for(i in seq_len(numberOfMS1features))
    if(length(grep(x = rts[[i]], pattern = ".*\\..*")) == 0)
      rts[[i]] <- paste(rts[[i]], ".0", sep = "")
  
  regexResult <- gregexpr(pattern = "^(?<before>\\d+)\\.(?<after>\\d+)$", text = mzs, perl = TRUE)
  mzStartsBefore  <- unlist(lapply(X = regexResult, FUN = function(x){attr(x = x, which = "capture.start")[[1]]}))
  mzStartsAfter   <- unlist(lapply(X = regexResult, FUN = function(x){attr(x = x, which = "capture.start")[[2]]}))
  mzLengthsBefore <- unlist(lapply(X = regexResult, FUN = function(x){attr(x = x, which = "capture.length")[[1]]}))
  mzLengthsAfter  <- unlist(lapply(X = regexResult, FUN = function(x){attr(x = x, which = "capture.length")[[2]]}))
  
  mzMaxBefore <- max(mzLengthsBefore)
  mzMaxAfter  <- max(mzLengthsAfter )
  
  regexResult <- gregexpr(pattern = "^(?<before>\\d+)\\.(?<after>\\d+)$", text = rts, perl = TRUE)
  rtStartsBefore  <- unlist(lapply(X = regexResult, FUN = function(x){attr(x = x, which = "capture.start")[[1]]}))
  rtStartsAfter   <- unlist(lapply(X = regexResult, FUN = function(x){attr(x = x, which = "capture.start")[[2]]}))
  rtLengthsBefore <- unlist(lapply(X = regexResult, FUN = function(x){attr(x = x, which = "capture.length")[[1]]}))
  rtLengthsAfter  <- unlist(lapply(X = regexResult, FUN = function(x){attr(x = x, which = "capture.length")[[2]]}))
  
  rtMaxBefore <- max(rtLengthsBefore)
  rtMaxAfter  <- max(rtLengthsAfter )
  
  
  maximumNumberOfDecimalPlacesForMz <- 3
  maximumNumberOfDecimalPlacesForRt <- 2
  
  for(idx in seq_along(mzs)){
    mzStartBefore  <- mzStartsBefore [[idx]]
    mzStartAfter   <- mzStartsAfter  [[idx]]
    mzLengthBefore <- mzLengthsBefore[[idx]]
    mzLengthAfter  <- mzLengthsAfter [[idx]]
    
    rtStartBefore  <- rtStartsBefore [[idx]]
    rtStartAfter   <- rtStartsAfter  [[idx]]
    rtLengthBefore <- rtLengthsBefore[[idx]]
    rtLengthAfter  <- rtLengthsAfter [[idx]]
    
    mzBefore <- substr(start = mzStartBefore, stop = mzStartBefore + mzLengthBefore - 1, x = mzs[[idx]])
    mzAfter  <- substr(start = mzStartAfter,  stop = mzStartAfter  + mzLengthAfter  - 1, x = mzs[[idx]])
    
    rtBefore <- substr(start = rtStartBefore, stop = rtStartBefore + rtLengthBefore - 1, x = rts[[idx]])
    rtAfter  <- substr(start = rtStartAfter,  stop = rtStartAfter  + rtLengthAfter  - 1, x = rts[[idx]])
    
    if(nchar(mzBefore) < mzMaxBefore) 
      mzBefore <- paste(
        paste(rep(x = "  ", times = mzMaxBefore - nchar(mzBefore)), collapse = ""),
        mzBefore,
        sep = ""
      )
    if(nchar(mzAfter) > maximumNumberOfDecimalPlacesForMz)
      mzAfter <- substr(x = mzAfter, start = 1, stop = maximumNumberOfDecimalPlacesForMz)
    if(nchar(mzAfter) < maximumNumberOfDecimalPlacesForMz)
      mzAfter <- paste(
        mzAfter,
        paste(rep(x = "0", times = maximumNumberOfDecimalPlacesForMz - nchar(mzAfter)), collapse = ""),
        sep = ""
      )
    
    if(nchar(rtBefore) < rtMaxBefore) 
      rtBefore <- paste(
        paste(rep(x = "  ", times = rtMaxBefore - nchar(rtBefore)), collapse = ""),
        rtBefore,
        sep = ""
      )
    if(nchar(rtAfter) > maximumNumberOfDecimalPlacesForRt)
      rtAfter <- substr(x = rtAfter, start = 1, stop = maximumNumberOfDecimalPlacesForRt)
    if(nchar(rtAfter) < maximumNumberOfDecimalPlacesForRt)
      rtAfter <- paste(
        rtAfter,
        paste(rep(x = "0", times = maximumNumberOfDecimalPlacesForRt - nchar(rtAfter)), collapse = ""),
        sep = ""
      )
    
    mzs[[idx]] <- paste(mzBefore, mzAfter, sep = ".")
    rts[[idx]] <- paste(rtBefore, rtAfter, sep = ".")
  }
  precursorLabels <- paste(mzs, rts, sep = " / ")
  
  ## remove duplicated MS1 features
  duplicated <- which(duplicated(precursorLabels))
  numberOfDuplicated <- length(duplicated)
  if(numberOfDuplicated > 0){
    precursorLabels <- precursorLabels[-duplicated]
    metaboliteProfile <- metaboliteProfile[-duplicated, ]
    numberOfMS1features <- numberOfMS1features - numberOfDuplicated
    for(duplicatedRowIdxIdx in seq_along(duplicated)){
      duplicatedRowIdx <- duplicated[[duplicatedRowIdxIdx]]
      
      ## remove row from matrix
      indeces1 <- which(matrixRows == duplicatedRowIdx)
      if(length(indeces1) == 0)
        next
      
      matrixRows <- matrixRows[-indeces1]
      matrixCols <- matrixCols[-indeces1]
      matrixVals <- matrixVals[-indeces1]
      
      ## update subsequent matrix rows
      indeces2 <- which(matrixRows > duplicatedRowIdx)
      matrixRows[indeces2] <- matrixRows[indeces2] - 1
      
      ## update subsequent duplicated rows
      indeces3 <- which(duplicated > duplicatedRowIdx)
      duplicated[indeces3] <- duplicated[indeces3] - 1
    }
  }
  rownames(metaboliteProfile) <- precursorLabels
  
  #############################################################################################
  ## process features
  if(!is.na(progress))  if(progress)  incProgress(amount = 0.1, detail = "Features") else print("Features")
  
  ## get features
  featureIndeces <- list()
  featureCount <- vector(mode = "numeric", length = numberOfMS1features)
  
  for(i in seq_len(numberOfMS1features)){
    indecesHere <- which(matrixRows == i)
    featureIndecesHere <- matrixCols[indecesHere]
    numberOfFeatures <- length(featureIndecesHere)
    
    featureIndeces[[i]] <- featureIndecesHere
    featureCount[[i]] <- numberOfFeatures
  }
  
  if(!is.na(progress))  if(progress)  incProgress(amount = 0.1, detail = "Feature postprocessing") else print("Feature postprocessing")
  
  ## ms2 plot data
  ms2PlotDataNumberOfFragments <- fragmentGroupsNumberOfFramgents
  ms2PlotDataAverageAbundance  <- fragmentGroupsAverageIntensity
  ms2PlotDataFragmentMasses    <- fragmentGroupsAverageMass
  maxNumberOfFragments <- max(ms2PlotDataNumberOfFragments)
  ms2PlotDataColorMapFragmentData  <- makecmap(
    x = c(0, maxNumberOfFragments), n = 100, 
    colFn = colorRampPalette(c('grey', 'black'))
  )
  
  ## featureMatrix and annotation
  featureMatrix <- sparseMatrix(i = matrixRows, j = matrixCols, x = matrixVals)
  matrixRows <- NULL
  matrixCols <- NULL
  matrixVals <- NULL
  
  rownames(featureMatrix) <- precursorLabels
  colnames(featureMatrix) <- fragmentGroupsAverageMass
  
  ## featureIndexMatrix
  featureIndexMatrix <- matrix(nrow = numberOfMS1features, ncol = max(sapply(X = featureIndeces, FUN = length)))
  rownames(featureIndexMatrix) <- precursorLabels
  for(i in seq_len(numberOfMS1features))
    featureIndexMatrix[i, seq_len(length(featureIndeces[[i]]))] <- featureIndeces[[i]]
  
  minimumMass <- min(fragmentGroupsAverageMass)
  maximumMass <- max(fragmentGroupsAverageMass)
  
  ##################################################################################################
  ## process sample measurements
  
  ## sample columns
  sampleColumns <- tagsSector != ""
  for(allowedTag in allowedTags)
    sampleColumns[grep(x = tagsSector, pattern = paste("^", allowedTag, "$", sep = ""))] <- FALSE
  for(allowedTagPrefix in allowedTagPrefixes)
    sampleColumns[grep(x = tagsSector, pattern = paste("^", allowedTagPrefix, sep = ""))] <- FALSE
  sampleColumns <- which(sampleColumns)
  sampleColumnsStartEnd <- c(min(sampleColumns), max(sampleColumns))
  
  grouXXXps <- unique(tagsSector[sampleColumns])
  numberOfGroups <- length(grouXXXps)
  
  sampleNamesToExclude <- NULL
  
  
  dataColumnIndecesFunctionFromGroupIndex <- function(groupIdx, sampleNamesToExclude = NULL){
    which(tagsSector == grouXXXps[[groupIdx]] & !(metaboliteProfileColumnNames %in% sampleNamesToExclude))
  }
  dataColumnsNameFunctionFromGroupIndex <- function(groupIdx, sampleNamesToExclude = NULL){
    sampleNames = metaboliteProfileColumnNames[dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)]
    return(sampleNames)
  }
  dataColumnsNameFunctionFromGroupName <- function(group, sampleNamesToExclude = NULL){
    dataColumnsNameFunctionFromGroupIndex(groupIdx = match(x = group, table = grouXXXps), sampleNamesToExclude = sampleNamesToExclude)
  }
  dataColumnsNameFunctionFromGroupNames <- function(grouXXXps, sampleNamesToExclude = NULL){
    unlist(lapply(X = grouXXXps, FUN = function(x){dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude)}))
  }
  groupNameFunctionFromDataColumnName <- function(dataColumnName, sampleNamesToExclude = NULL){
    groupIdx <- which(unlist(lapply(X = grouXXXps, FUN = function(x){
      dataColumnNames <- dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude)
      any(dataColumnNames == dataColumnName)
    })))
    grouXXXps[[groupIdx]]
  }
  lfcColumnNameFunctionFromString <- function(columnName){
    tokens <- strsplit(x = columnName, split = "_vs_")[[1]]
    groupOne <- strsplit(x = tokens[[1]], split = "LFC_")[[1]][[2]]
    groupTwo <- tokens[length(tokens)]
    return(c(groupOne, groupTwo))
  }
  dataMeanColumnNameFunctionFromString <- function(columnName){
    group <- substr(x = columnName, start = 1, stop = nchar(columnName) - nchar("_mean"))
    return(group)
  }
  
  ## manage group and samples: order and exclusion
  groupNames  <- tagsSector[sampleColumns]
  sampleNames <- metaboliteProfileColumnNames[sampleColumns]
  
  if(nchar(groupSampleDataFrameFieldValue) == 0){
    ## not there: backward compatibility - add if not there
    groupSampleDataFrame <- data.frame(stringsAsFactors = FALSE, 
                                       "Group"   = groupNames,
                                       "Sample"  = sampleNames,
                                       "Order"   = seq_along(sampleNames),
                                       "Exclude" = rep(x = FALSE, times = length(sampleNames))
    )
  } else {
    groupSampleDataFrame <- deserializeSampleSelectionAndOrder(groupSampleDataFrameFieldValue)
  }
  dataFrameMS1Header[[1,2]] <- serializeSampleSelectionAndOrder(groupSampleDataFrame)
  
  returnObj <- processMS1data(
    sampleNamesToExclude=sampleNamesToExclude, numberOfMS1features=numberOfMS1features, precursorLabels=precursorLabels, 
    grouXXXps=grouXXXps, metaboliteProfileColumnNames=metaboliteProfileColumnNames, tagsSector = tagsSector, 
    dataColumnIndecesFunctionFromGroupIndex=dataColumnIndecesFunctionFromGroupIndex, dataColumnsNameFunctionFromGroupIndex=dataColumnsNameFunctionFromGroupIndex, dataColumnsNameFunctionFromGroupName=dataColumnsNameFunctionFromGroupName, dataColumnsNameFunctionFromGroupNames=dataColumnsNameFunctionFromGroupNames, groupNameFunctionFromDataColumnName=groupNameFunctionFromDataColumnName,
    metaboliteProfile=metaboliteProfile, progress=progress
  )
  
  ## name functions
  dataMeanColumnNameFunctionFromIndex <- returnObj$dataMeanColumnNameFunctionFromIndex
  dataMeanColumnNameFunctionFromName  <- returnObj$dataMeanColumnNameFunctionFromName
  lfcColumnNameFunctionFromIndex      <- returnObj$lfcColumnNameFunctionFromIndex
  lfcColumnNameFunctionFromName       <- returnObj$lfcColumnNameFunctionFromName
  groupNameFromGroupIndex             <- returnObj$groupNameFromGroupIndex
  groupIdxFromGroupName               <- returnObj$groupIdxFromGroupName
  ## data and names
  dataFrameMeasurements               <- returnObj$dataFrameMeasurements
  ## colors
  colorMatrixDataFrame                <- returnObj$colorMatrixDataFrame
  colorMapAbsoluteData                <- returnObj$colorMapAbsoluteData
  colorMapLogFoldChange               <- returnObj$colorMapLogFoldChange
  columnGroupLabels                   <- returnObj$columnGroupLabels
  ## constants
  meanAllMax       <- returnObj$meanAllMax
  logFoldChangeMax <- returnObj$logFoldChangeMax
  logAbsMax        <- returnObj$logAbsMax
  
  #########################################################################################
  ## precursor annotation fields
  if(!is.na(progress))  if(progress)  incProgress(amount = 0.1, detail = "Feature annotations") else print("Feature annotations")
  annotationValueIgnore <- "Ignore"
  annotationColorIgnore <- "red"
  
  ## present annotations
  annotations    <- vector(mode='list', length=numberOfMS1features)
  annoVals <- metaboliteProfile[, annotationColumnName]
  for(i in seq_len(numberOfMS1features)){
    if(nchar(annoVals[[i]]) > 0){
      annotations[[i]] <- as.list(unlist(strsplit(x = annoVals[[i]], split = ", ")))
    }
    else{
      annotations[[i]] <- list()
    }
  }
  
  annoArrayOfLists    <- vector(mode='list', length=numberOfMS1features)
  annoArrayIsArtifact <- vector(mode='logical', length=numberOfMS1features)
  for(i in seq_len(numberOfMS1features)){
    ignoreCheck <- annotations[[i]] == annotationValueIgnore
    ignoreThere <- any(ignoreCheck)
    
    if(ignoreThere){
      idx <- which(ignoreCheck)
      annotations[[i]] <- annotations[[i]][-idx]
    }
    annoArrayOfLists[[i]]    <- annotations[[i]]
    annoArrayIsArtifact[[i]] <- ignoreThere
  }
  
  ## present annos with colors
  annotationColorsMapValue <- substr(
    x = annotationColorsValue, 
    start = nchar(paste(annotationColorsName, "={", sep = "")) + 1, 
    stop = nchar(annotationColorsValue) - nchar("}")
  )
  
  if(nchar(annotationColorsMapValue) > 0){

    annotationColorsMapValuePairs <- unlist(strsplit(x = annotationColorsMapValue, split = ", "))
    annotationColorsMapValues <- unlist(strsplit(x = annotationColorsMapValuePairs, split = "="))
    annotationColorsMapKeys   <- annotationColorsMapValues[seq(from = 1, to = length(annotationColorsMapValues), by = 2)]
    annotationColorsMapValues <- annotationColorsMapValues[seq(from = 2, to = length(annotationColorsMapValues), by = 2)]
  } else {
    annotationColorsMapKeys <- NULL
    annotationColorsMapValues <- NULL
  }
  
  annoPresentAnnotationsList <- list()
  annoPresentColorsList <- list()
  annoPresentAnnotationsList[[1]] <- annotationValueIgnore
  annoPresentColorsList[[1]] <- annotationColorIgnore
  if(length(annotationColorsMapKeys) > 0)
    for(i in seq_len(length(annotationColorsMapKeys))){
      annoPresentAnnotationsList[[1 + i]] <- annotationColorsMapKeys[[i]]
      annoPresentColorsList     [[1 + i]] <- annotationColorsMapValues[[i]]
    }
  
  ## check consistency
  if(!all(unique(unlist(annoArrayOfLists)) %in% unlist(annoPresentAnnotationsList))){
    missing <- unique(unlist(annoArrayOfLists))[!(unique(unlist(annoArrayOfLists)) %in% unlist(annoPresentAnnotationsList))]
    stop(paste("Annotation(s)", paste(missing, collapse = "; "), "missing in present annotations list"))
  }
  if(!all(unlist(annoPresentAnnotationsList) %in% unique(c(annotationValueIgnore, unlist(annoArrayOfLists))))){
    missing <- unlist(annoPresentAnnotationsList)[!(unlist(annoPresentAnnotationsList) %in% unique(c(annotationValueIgnore, unlist(annoArrayOfLists))))]
    stop(paste("Present annotation(s)", paste(missing, collapse = "; "), "missing in annotations"))
  }
  
  ##################################################################################################
  ## box
  if(!is.na(progress))  if(progress)  incProgress(amount = 0.1, detail = "Boxing") else print("Boxing")
  dataList <- list()
  ## data
  dataList$dataFrameHeader <- dataFrameHeader
  dataList$dataFrameMS1Header <- dataFrameMS1Header
  dataList$dataFrameInfos <- metaboliteProfile
  dataList$importParameterSet <- importParameterSet
  dataList$numberOfPrecursors <- numberOfMS1features
  dataList$numberOfDuplicatedPrecursors <- numberOfDuplicated
  dataList$grouXXXps <- grouXXXps
  dataList$columnGroupLabels <- columnGroupLabels
  dataList$groupSampleDataFrame <- groupSampleDataFrame
  dataList$metaboliteProfileColumnNames <- metaboliteProfileColumnNames
  dataList$tagsSector <- tagsSector
  ## data: fragments
  dataList$fragmentMasses <- fragmentGroupsAverageMass
  dataList$fragmentFrequency <- fragmentGroupsNumberOfFramgents
  dataList$fragmentAbundance <- fragmentGroupsAverageIntensity
  dataList$minimumMass <- minimumMass
  dataList$maximumMass <- maximumMass
  dataList$precursorLabels <- precursorLabels
  ## data: abundancies
  dataList$dataFrameMeasurements <- dataFrameMeasurements
  dataList$meanAllMax <- meanAllMax
  dataList$logFoldChangeMax <- logFoldChangeMax
  dataList$logAbsMax <- logAbsMax
  dataList$colorMatrixDataFrame <- colorMatrixDataFrame
  dataList$colorMapAbsoluteData <- colorMapAbsoluteData
  dataList$colorMapLogFoldChange <- colorMapLogFoldChange
  ## data: column name functions
  dataList$dataColumnsNameFunctionFromGroupName <- dataColumnsNameFunctionFromGroupName
  dataList$dataColumnsNameFunctionFromGroupIndex <- dataColumnsNameFunctionFromGroupIndex
  dataList$dataColumnsNameFunctionFromGroupNames <- dataColumnsNameFunctionFromGroupNames
  dataList$groupNameFunctionFromDataColumnName <- groupNameFunctionFromDataColumnName
  dataList$lfcColumnNameFunctionFromString <- lfcColumnNameFunctionFromString
  dataList$dataMeanColumnNameFunctionFromString <- dataMeanColumnNameFunctionFromString
  dataList$dataColumnIndecesFunctionFromGroupIndex <- dataColumnIndecesFunctionFromGroupIndex
  dataList$dataMeanColumnNameFunctionFromName <- dataMeanColumnNameFunctionFromName
  dataList$dataMeanColumnNameFunctionFromIndex <- dataMeanColumnNameFunctionFromIndex
  dataList$lfcColumnNameFunctionFromName <- lfcColumnNameFunctionFromName
  dataList$lfcColumnNameFunctionFromIndex <- lfcColumnNameFunctionFromIndex
  dataList$groupNameFromGroupIndex <- groupNameFromGroupIndex
  dataList$groupIdxFromGroupName <- groupIdxFromGroupName
  ## features
  dataList$featureMatrix <- featureMatrix
  dataList$featureIndeces <- featureIndeces
  dataList$featureCount <- featureCount
  dataList$featureIndexMatrix <- featureIndexMatrix
  ## ms2 plot data
  dataList$ms2_numberOfFragments <- ms2PlotDataNumberOfFragments
  dataList$ms2_averageAbundance  <- ms2PlotDataAverageAbundance
  dataList$ms2_masses            <- ms2PlotDataFragmentMasses
  dataList$colorMapFragmentData <- ms2PlotDataColorMapFragmentData
  ## annotations
  dataList$annotationColumnName <- annotationColumnName
  dataList$annotationColorsName <- annotationColorsName
  dataList$annotationColumnIndex <- annotationColumnIndex
  dataList$annotationValueIgnore <- annotationValueIgnore
  dataList$annotationColorIgnore <- annotationColorIgnore
  dataList$annoArrayOfLists <- annoArrayOfLists
  dataList$annoArrayIsArtifact <- annoArrayIsArtifact
  dataList$annoPresentAnnotationsList <- annoPresentAnnotationsList
  dataList$annoPresentColorsList <- annoPresentColorsList
  
  if(!is.na(progress))  if(progress)  setProgress(1) else print("Ready")
  
  
  ## redefine MS1 column functions
  dataColumnIndecesFunctionFromGroupIndex <- function(groupIdx, sampleNamesToExclude){
    which(dataList$tagsSector == dataList$grouXXXps[[groupIdx]] & !(dataList$metaboliteProfileColumnNames %in% sampleNamesToExclude))
  }
  dataList$dataColumnIndecesFunctionFromGroupIndex <- dataColumnIndecesFunctionFromGroupIndex
  dataColumnsNameFunctionFromGroupIndex <- function(groupIdx, sampleNamesToExclude){
    dataList$metaboliteProfileColumnNames[dataList$dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)]
  }
  dataList$dataColumnsNameFunctionFromGroupIndex <- dataColumnsNameFunctionFromGroupIndex
  dataColumnsNameFunctionFromGroupName <- function(group, sampleNamesToExclude){
    dataColumns <- dataList$dataColumnsNameFunctionFromGroupIndex(groupIdx = match(x = group, table = dataList$grouXXXps), sampleNamesToExclude = sampleNamesToExclude)
  }
  dataList$dataColumnsNameFunctionFromGroupName <- dataColumnsNameFunctionFromGroupName
  dataColumnsNameFunctionFromGroupNames <- function(grouXXXps, sampleNamesToExclude){
    unlist(lapply(X = grouXXXps, FUN = function(x){
      dataList$dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude)
    }))
  }
  dataList$dataColumnsNameFunctionFromGroupNames <- dataColumnsNameFunctionFromGroupNames
  groupNameFunctionFromDataColumnName <- function(dataColumnName, sampleNamesToExclude){
    groupIdx <- which(unlist(lapply(X = dataList$grouXXXps, FUN = function(x){
      dataColumnNames <- dataList$dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude)
      any(dataColumnNames == dataColumnName)
    })))
    dataList$grouXXXps[[groupIdx]]
  }
  dataList$groupNameFunctionFromDataColumnName <- groupNameFunctionFromDataColumnName
  
  orderColumnNames <- function(groupSampleDataFrame, columnNames){
    order <- groupSampleDataFrame[groupSampleDataFrame[, "Sample"] %in% columnNames, "Order"]
    list <- list()
    list[order] <- columnNames
    columnNames <- unlist(list)
    return(columnNames)
  }
  dataList$orderColumnNames <- orderColumnNames
  
  ## define sample in-/exclusion functions
  excludedSamples <- function(groupSampleDataFrame, grouXXXps = dataList$grouXXXps){
    samples    =  groupSampleDataFrame[, "Sample"]
    isExcluded =  groupSampleDataFrame[, "Exclude"]
    isGroup    =  groupSampleDataFrame[, "Group"] %in% grouXXXps
    return(samples[isExcluded & isGroup])
  }
  dataList$excludedSamples <- excludedSamples
  includedSamples <- function(groupSampleDataFrame, grouXXXps = dataList$grouXXXps){
    samples    =  groupSampleDataFrame[, "Sample"]
    isIncluded = !groupSampleDataFrame[, "Exclude"]
    isGroup    =  groupSampleDataFrame[, "Group"] %in% grouXXXps
    return(samples[isIncluded & isGroup])
  }
  dataList$includedSamples <- includedSamples
  
  includedGroups <- function(groupSampleDataFrame, samples = dataList$groupSampleDataFrame[, "Sample"]){
    unique(unlist(lapply(X = intersect(samples, dataList$includedSamples(groupSampleDataFrame)), FUN = function(sampleName){
      dataList$groupNameFunctionFromDataColumnName(dataColumnName = sampleName, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))
    })))
  }
  dataList$includedGroups <- includedGroups
  excludedGroups <- function(groupSampleDataFrame, samples = dataList$groupSampleDataFrame[, "Sample"]){
    setdiff(dataList$grouXXXps, dataList$includedGroups(groupSampleDataFrame, samples)) 
  }
  dataList$excludedGroups <- excludedGroups
  
  return(dataList)
}

#' Process MS-Dial-like MS1 data.frame
#' 
#' Processing of MS-Dial-like MS1 data.frame. Includes calculation 
#' of MS1 data mean and log-fold-change (LFC) data
#'
#' @param sampleNamesToExclude 
#' @param numberOfMS1features 
#' @param precursorLabels 
#' @param grouXXXps 
#' @param metaboliteProfileColumnNames 
#' @param dataColumnIndecesFunctionFromGroupIndex 
#' @param dataColumnsNameFunctionFromGroupIndex 
#' @param dataColumnsNameFunctionFromGroupName 
#' @param dataColumnsNameFunctionFromGroupNames 
#' @param groupNameFunctionFromDataColumnName 
#' @param tagsSector 
#' @param metaboliteProfile 
#' @param progress 
#'
#' @return
#' @export
#' @importFrom grDevices colorRampPalette rainbow
#'
#' @examples
processMS1data <- function(sampleNamesToExclude, 
                           numberOfMS1features, 
                           precursorLabels, 
                           grouXXXps, 
                           metaboliteProfileColumnNames, 
                           dataColumnIndecesFunctionFromGroupIndex, 
                           dataColumnsNameFunctionFromGroupIndex, 
                           dataColumnsNameFunctionFromGroupName, 
                           dataColumnsNameFunctionFromGroupNames, 
                           groupNameFunctionFromDataColumnName,
                           tagsSector, 
                           metaboliteProfile, 
                           progress=FALSE)
{
  numberOfGroups <- length(grouXXXps)
  
  ####################
  ## MS1 measurement data: mean and LFC
  if(!is.na(progress))  
    if(progress)  
      incProgress(amount = 0.1, detail = "Coloring") 
  else 
    print("Coloring")

  if(!is.na(progress))  
    if(progress)  
      incProgress(amount = 0, detail = "Coloring init") 
  else 
    print("Coloring init")
  
  dataFrameMeasurements <- data.frame(matrix(nrow = numberOfMS1features, ncol = 0))
  rownames(dataFrameMeasurements) <- precursorLabels
  
  ## column name functions
  if(!is.na(progress))  
    if(progress)  
      incProgress(amount = 0, detail = "Coloring naming functions") 
  else 
    print("Coloring naming functions")
  
  ## store data of grouXXXps
  dataColumnNames <- list()
  for(groupIdx in seq_len(numberOfGroups)){
    dataColumnNamesHere <- dataColumnsNameFunctionFromGroupIndex(groupIdx = groupIdx, 
                                                                 sampleNamesToExclude = sampleNamesToExclude)
    dataColumnNames <- c(dataColumnNames, dataColumnNamesHere)
    dataFrameMeasurements[, dataColumnNamesHere] <- data.numericmatrix(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, 
                                                                                                                                   sampleNamesToExclude = sampleNamesToExclude), 
                                                                                         drop = FALSE])
  }
  dataColumnNames <- unlist(dataColumnNames)
  
  dataMeanColumnNameFunctionFromName  <- function(group){
    return(paste(group, "_mean", sep = ""))
  }
  
  dataMeanColumnNameFunctionFromIndex  <- function(groupIdx){
    return(dataMeanColumnNameFunctionFromName(grouXXXps[[groupIdx]]))
  }
  
  lfcColumnNameFunctionFromName <- function(groupOne, groupTwo){
    return(paste("LFC", groupOne, "vs", groupTwo, sep = "_"))
  }
  
  lfcColumnNameFunctionFromIndex <- function(groupIdxOne, groupIdxTwo){
    lfcColumnNameFunctionFromName(grouXXXps[[groupIdxOne]], grouXXXps[[groupIdxTwo]])
  }
  
  groupNameFromGroupIndex <- function(groupIdx){
    return(grouXXXps[[groupIdx]])
  }
  
  groupIdxFromGroupName <- function(group){
    return(match(x = group, table = grouXXXps))
  }
  
  if(!is.na(progress))  
    if(progress)  incProgress(amount = 0, detail = "Coloring gather data") 
  else 
    print("Coloring gather data")
  
  ## mean data columns
  dataMeanColumnNames <- list()
  for(groupIdx in seq_len(numberOfGroups)){
    dataMeanColumnName <- dataMeanColumnNameFunctionFromIndex(groupIdx)
    dataMeanColumnNames[[groupIdx]] <- dataMeanColumnName
    if(is(unlist(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)]),"character"))
      for(colIdx in dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude))
        metaboliteProfile[, colIdx] <- as.numeric(metaboliteProfile[, colIdx])
    
    dataFrameMeasurements[, dataMeanColumnName] <- apply(X = data.numericmatrix(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude), drop=FALSE]), MARGIN = 1, FUN = mean)
    dataFrameMeasurements[is.na(dataFrameMeasurements[, dataMeanColumnName]), dataMeanColumnName] <- 0
  }
  dataMeanColumnNames <- unlist(dataMeanColumnNames)
  
  ## all replicates mean
  dataFrameMeasurements[, "meanAllNormed"] <- apply(
    X = data.numericmatrix(metaboliteProfile[, 
                                             unlist(lapply(X = seq_len(numberOfGroups), FUN = function(x) {dataColumnIndecesFunctionFromGroupIndex(groupIdx = x, sampleNamesToExclude = sampleNamesToExclude)})),
                                             drop=FALSE]), 
    MARGIN = 1, FUN = mean
  )
  
  meanAllMax <- max(dataFrameMeasurements[, "meanAllNormed"])
  if(meanAllMax != 0)
    dataFrameMeasurements[, "meanAllNormed"] <- dataFrameMeasurements[, "meanAllNormed"] / meanAllMax
  
  ## log fold change between grouXXXps
  lfcColumnNames <- list()
  for(groupIdx1 in seq_len(numberOfGroups))
    for(groupIdx2 in seq_len(numberOfGroups)){
      lfcColumnName <- lfcColumnNameFunctionFromIndex(groupIdx1, groupIdx2)
      lfcColumnNames[[length(lfcColumnNames) + 1]] <- lfcColumnName
      dataFrameMeasurements[, lfcColumnName] <- log(
        x = dataFrameMeasurements[, dataMeanColumnNameFunctionFromIndex(groupIdx1)] / dataFrameMeasurements[, dataMeanColumnNameFunctionFromIndex(groupIdx2)], 
        base = 2
      )
      
      ## tackle zero values
      dataFrameMeasurements[is.na(dataFrameMeasurements[, lfcColumnName]), lfcColumnName] <- 0
      dataFrameMeasurements[is.infinite(dataFrameMeasurements[, lfcColumnName]), lfcColumnName] <- 0
    }
  lfcColumnNames <- unlist(lfcColumnNames)
  
  
  #########################################################################################
  ## MS1 measurement data to colors
  if(!is.na(progress))  
    if(progress)  
      incProgress(amount = 0, detail = "Coloring matrix") 
  else 
    print("Coloring matrix")
  
  matrixDataFrame <- data.numericmatrix(dataFrameMeasurements)
  
  matrixDataFrame[, dataColumnNames    ][matrixDataFrame[, dataColumnNames    ] < 1] <- 1
  matrixDataFrame[, dataMeanColumnNames][matrixDataFrame[, dataMeanColumnNames] < 1] <- 1

  matrixDataFrame[, dataColumnNames]     <- log10(matrixDataFrame[, dataColumnNames])
  matrixDataFrame[, dataMeanColumnNames] <- log10(matrixDataFrame[, dataMeanColumnNames])
  matrixDataFrame[is.infinite(matrixDataFrame)] <- 0

  ## min / max
  logAbsMin <- min(0, min(matrixDataFrame[, dataMeanColumnNames]))
  logAbsMax <- max(matrixDataFrame[, c(dataColumnNames, dataMeanColumnNames)])
  logFoldChangeMinMax <- c(min(matrixDataFrame[, lfcColumnNames]), max(matrixDataFrame[, lfcColumnNames]))
  logFoldChangeMax <- max(abs(logFoldChangeMinMax))
  if(logFoldChangeMax < 1)
    logFoldChangeMax <- 1
  
  ## maps
  colorMapAbsoluteData  <- makecmap(
    x = c(logAbsMin, logAbsMax), n = 100, 
    colFn = colorRampPalette(rainbow(18)[10:1])
  )
  colorMapLogFoldChange <- makecmap(
    x = c(-logFoldChangeMax, logFoldChangeMax), n = 100, 
    colFn = colorRampPalette(c('blue', 'white', 'red'))
  )
  
  columnGroupLabels <- sapply(X = grouXXXps, 
                              FUN = function(x){ 
                                rep(x = x, 
                                    times = length(dataColumnsNameFunctionFromGroupName(group = x, 
                                                                                        sampleNamesToExclude = sampleNamesToExclude))) 
                              })
  
  ## translate and box colors
  if(!is.na(progress))  
    if(progress)  
      incProgress(amount = 0, detail = "Coloring box") 
  else 
    print("Coloring box")
  
  colorDataFrame <- dataFrameMeasurements
  colorDataFrame[, dataColumnNames    ] <- cmap(x = matrixDataFrame[, dataColumnNames    ], map = colorMapAbsoluteData)
  colorDataFrame[, dataMeanColumnNames] <- cmap(x = matrixDataFrame[, dataMeanColumnNames], map = colorMapAbsoluteData)
  colorDataFrame[, lfcColumnNames]      <- cmap(x = matrixDataFrame[, lfcColumnNames     ], map = colorMapLogFoldChange)
  colorMatrixDataFrame <- as.matrix(colorDataFrame)
  
  returnObj <- list(
    ## name functions
    dataMeanColumnNameFunctionFromIndex=dataMeanColumnNameFunctionFromIndex,
    dataMeanColumnNameFunctionFromName=dataMeanColumnNameFunctionFromName,
    lfcColumnNameFunctionFromIndex=lfcColumnNameFunctionFromIndex,
    lfcColumnNameFunctionFromName=lfcColumnNameFunctionFromName,
    groupNameFromGroupIndex=groupNameFromGroupIndex,
    groupIdxFromGroupName=groupIdxFromGroupName,
    ## data and names
    dataFrameMeasurements=dataFrameMeasurements,
    ## colors
    colorMatrixDataFrame=colorMatrixDataFrame,
    colorMapAbsoluteData=colorMapAbsoluteData,
    colorMapLogFoldChange=colorMapLogFoldChange,
    columnGroupLabels=columnGroupLabels,
    ## constants
    meanAllMax=meanAllMax,
    logFoldChangeMax=logFoldChangeMax,
    logAbsMax=logAbsMax
  )
}

serializeSampleSelectionAndOrder <- function(groupSampleDataFrame)
{
  ## wrap columns
  columnsSerialized <- sapply(X = seq_len(ncol(groupSampleDataFrame)), FUN = function(colIdx){
    cellContent <- paste(groupSampleDataFrame[, colIdx], collapse = "; ")
    paste(colnames(groupSampleDataFrame)[[colIdx]], "=", "(", cellContent, ")", sep = "")
  })
  ## box
  groupSampleDataFrameName <- "SampleSelectionAndOrder"
  groupSampleDataFrameValue <- paste(columnsSerialized, collapse = "|")
  groupSampleDataFrameFieldValue <- paste(groupSampleDataFrameName, "=", "{", groupSampleDataFrameValue, "}", sep = "")
  
  return(groupSampleDataFrameFieldValue)
}

deserializeSampleSelectionAndOrder <- function(groupSampleDataFrameFieldValue){
  ## unbox
  groupSampleDataFrameName <- "SampleSelectionAndOrder"
  groupSampleDataFrameValue <- substr(
    x = groupSampleDataFrameFieldValue, 
    start = nchar(paste(groupSampleDataFrameName, "={", sep = "")) + 1, 
    stop = nchar(groupSampleDataFrameFieldValue) - nchar("}")
  )
  
  ## unwrap
  columnsSerialized <- strsplit(x = groupSampleDataFrameValue, split = "\\|")[[1]]
  columnNames = unlist(lapply(X = strsplit(x = columnsSerialized, split = "="), FUN = function(x){
    x[[1]]
  }))
  groupSampleDataFrame <- as.data.frame(stringsAsFactors = FALSE, lapply(X = strsplit(x = columnsSerialized, split = "="), FUN = function(x){
    cellContent <- x[[2]]
    cellContent <- substr(
      x = cellContent, 
      start = 1 + nchar("("), 
      stop = nchar(cellContent) - nchar(")")
    )
    cellContent <- strsplit(x = cellContent, split = "; ")
  }))
  colnames(groupSampleDataFrame) <- columnNames
  
  groupSampleDataFrame[, "Order"]   <- as.integer(groupSampleDataFrame[, "Order"])
  groupSampleDataFrame[, "Exclude"] <- as.logical(groupSampleDataFrame[, "Exclude"])
  
  return(groupSampleDataFrame)
}

serializeParameterSetFile <- function(importParameterSet, toolName, toolVersion){
  ## wrap
  importParametersValue <- paste(names(importParameterSet), importParameterSet, sep = "=", collapse = "\n")
  ## box
  comment <- paste(
    "# This is the set of parameters which have been used for the initial data import to ",
    importParameterSet$toolVersion,
    "\n",
    "# Exported with ",
    toolName, " ", toolVersion,
    sep = ""
  )
  importParametersFileValue <- paste(comment, importParametersValue, sep = "\n")
  return(importParametersFileValue)
}

deserializeParameterSetFile <- function(importParametersFileContent){
  ## remove comments
  importParametersValuePairs <- importParametersFileContent[-grep(pattern = "#.*", x = importParametersFileContent)]
  ## deserialize
  importParameterSet <- deserializeParameterSetKeyValuePairs(importParametersValuePairs)
  return(importParameterSet)
}

serializeParameterSet <- function(importParameterSet){
  ## wrap
  importParametersValue <- paste(names(importParameterSet), importParameterSet, sep = "=", collapse = "; ")
  ## box
  importParametersName <- "ImportParameters"
  importParametersFieldValue <- paste(importParametersName, "={", importParametersValue, "}", sep = "")
  return(importParametersFieldValue)
}

deserializeParameterSet <- function(importParametersFieldValue){
  ## unbox
  importParametersName <- "ImportParameters"
  importParametersValue <- substr(
    x = importParametersFieldValue, 
    start = nchar(paste(importParametersName, "={", sep = "")) + 1, 
    stop = nchar(importParametersFieldValue) - nchar("}")
  )
  
  ## unwrap
  importParametersValuePairs <- unlist(strsplit(x = importParametersValue, split = "; "))
  importParameterSet <- deserializeParameterSetKeyValuePairs(importParametersValuePairs)
  return(importParameterSet)
}

deserializeParameterSetKeyValuePairs <- function(importParametersValuePairs){
  ## unwrap
  importParametersValuePairsList <- strsplit(x = importParametersValuePairs, split = "=")
  
  ## catch empty parameter values
  for(i in seq_len(length(importParametersValuePairsList)))
    if(length(importParametersValuePairsList[[i]]) == 1)
      importParametersValuePairsList[[i]][[2]] <- ""
    
    ## split
    importParametersValues <- unlist(importParametersValuePairsList)
    importParametersKeys   <- importParametersValues[seq(from = 1, to = length(importParametersValues), by = 2)]
    importParametersValues <- importParametersValues[seq(from = 2, to = length(importParametersValues), by = 2)]
    
    ## box to list
    importParameterSet        <- as.list(importParametersValues)
    names(importParameterSet) <- importParametersKeys
    
    ## cast logical's and numeric's
    importParameterSet <- castListEntries(importParameterSet)
    return(importParameterSet)
}

#' Cast logical's and numeric's in a list or data.frame
#'
#' Tries to cast a list entry (or column in a data.frame) to logical's, 
#' if that does not create any missing values, it is assumed 
#' to be a logical will be replaced by `as.logical()` conversion.
#' Similarly for numeric entries (or columns). Everything else remains strings
#' 
#' @param list 
#'
#' @return list of the same lenght with logical's and numeric's casted
#' @export
#'
#' @examples
castListEntries <- function(list){
  ## cast logical's and numeric's
  suppressWarnings(
    for(idx in seq_len(length(list))){
      if(!is.na(as.logical(list[[idx]]))){
        ## logical
        list[[idx]] <- as.logical(list[[idx]])
      } else if(!is.na(as.numeric(list[[idx]]))){
        ## numeric
        list[[idx]] <- as.numeric(list[[idx]])
      } else {
        ## string
      }
    }
  )
  
  return(list)
}

#########################################################################################
## annotation stuff
precursorSetToSetOfAnnotationSets <- function(dataList, precursorSet){
  setOfAnnotationSets <- lapply(X = precursorSet, FUN = function(x){
    annotationSet <- dataList$annoArrayOfLists[[x]]
    if(dataList$annoArrayIsArtifact[[x]])
      annotationSet <- c(annotationSet, "Ignore")
    return(unlist(annotationSet))
  })
  return(setOfAnnotationSets)
}
setOfAnnotationSetsToSetOfColorSets <- function(dataList, setOfAnnotationSets){
  setOfColorSets <- lapply(X = setOfAnnotationSets, FUN = function(x){
    if(is.null(x))
      ## no annotation
      colors <- "black"
    else
      ## at least one annotation
      colors <- unlist(lapply(X = x, FUN = function(y){
        dataList$annoPresentColorsList[[match(x = y, table = dataList$annoPresentAnnotationsList)]] }
      ))
    
    return(colors)
  })
  return(setOfColorSets)
}
getPrecursorColors <- function(dataList, precursorSet){
  setOfAnnotationSets <- precursorSetToSetOfAnnotationSets(dataList, precursorSet)
  setOfColorSets      <- setOfAnnotationSetsToSetOfColorSets(dataList, setOfAnnotationSets)
  setOfColors <- lapply(X = setOfColorSets, FUN = function(x){
    if(any(x == "red"))
      ## at least one annotation is ignore --> take ignore
      color <- "red"
    else{
      switch(as.character(length(x)),
             "0"={## no annotation
               color <- "black"
             },
             "1"={## one annotation
               color <- x
             },
             {## multiple annotations --> take the one which is primary
               color <- x[[1]]
             }
      )## end switch
    }## end else
  })## end lapply
  setOfAnnotations <- lapply(X = setOfAnnotationSets, FUN = function(x){
    if(any(x == "Ignore"))
      ## at least one annotation is ignore --> take ignore
      annotation <- "Ignore"
    else{
      switch(as.character(length(x)),
             "0"={## no annotation
               annotation <- "Unknown"
             },
             "1"={## one annotation
               annotation <- x
             },
             {## multiple annotations --> take the one which is primary
               annotation <- x[[1]]
             }
      )## end switch
    }## end else
  })## end lapply
  
  resultList <- list(
    setOfColors      = unlist(setOfColors),
    setOfAnnotations = unlist(setOfAnnotations)
  )
  
  #return(unlist(setOfColors))
  return(resultList)
}

#########################################################################################
## data fetching
getMetFragLink <- function(dataList, precursorIndex, outAdductWarning = TRUE){
  features <- dataList$featureIndeces[[precursorIndex]]
  fragmentsX <- dataList$fragmentMasses[features]
  fragmentsY <- as.numeric(dataList$featureMatrix[precursorIndex, features])
  fragmentsY[fragmentsY > 1] <- 1
  
  precursorMass  <- as.numeric(dataList$dataFrameInfos$"m/z"[[precursorIndex]])
  adduct <- "Unknown"
  if("Adduct ion name" %in% colnames(dataList$dataFrameInfos))
    adduct <- dataList$dataFrameInfos$"Adduct ion name"[[precursorIndex]]
  if("Adduct.ion.name" %in% colnames(dataList$dataFrameInfos))
    adduct <- dataList$dataFrameInfos$"Adduct.ion.name"[[precursorIndex]]
  neutralMassCorrection <- NA
  ionMode <- NA
  #generateLink <- TRUE
  
  error <- NULL
  
  switch(adduct,
         "[M-H]-"={
           neutralMassCorrection <- 1.008
           ionMode <- -1
         },
         "[M+H]+"={
           neutralMassCorrection <- -1.008
           ionMode <- 1
         },
         "Unknown"={
           #generateLink <- FALSE
           neutralMassCorrection <- NA
           ionMode <- NA
           error <- paste("This MS\u00B9 feature cannot be send to MetFrag, because the adduct is unknown.")
         },{
           #stop(paste("Unknown adduct (", adduct, ")!", sep = ""))
           if(outAdductWarning) print(paste("###### Unknown adduct (", adduct, ")!", sep = ""))
           #generateLink <- FALSE
           neutralMassCorrection <- NA
           ionMode <- NA
           error <- paste("This MS\u00B9 feature cannot be send to MetFrag, because the adduct '", adduct, "' is not supported.")
         }
  )
  neutralMass <- precursorMass + neutralMassCorrection
  
  fragmentsPositive <- fragmentsX > 0
  fragmentsPositiveX <- fragmentsX[fragmentsPositive]
  fragmentsPositiveY <- fragmentsY[fragmentsPositive]
  fragmentStrings <- paste(fragmentsPositiveX, fragmentsPositiveY, sep = " ", collapse = "; ")
  
  metFragOld <- FALSE
  if(metFragOld){
    # http://msbi.ipb-halle.de/MetFragBeta/LandingPage.jspx?limit=1000&ionmode=-1&database=pubchem&mzppm=7&mzabs=0.005&mass=448.468&formula=C16H20N2O9S2&mzabs=0.05&peaks=130.0655 288214.8119 ; 207.0589 422771.0127 ; 208.0622  87002.3217 ; 210.1334   2674.1707 ; 351.1016  27580.9393 ; 369.1115 739357.5045 ; 370.1148 143864.9611 ; 385.1094   5971.8328 ; 391.0937 337133.4536 ; 392.1025  40126.6888 ; 407.0678   3095.0322 ; 449.0690  37952.2515 
    landingPageUrl <- paste(sep = "",
                            "http://msbi.ipb-halle.de/MetFrag/LandingPage.jspx?",
                            "mass=", neutralMass, "&",
                            "formula=", "", "&",
                            "ionmode=", ionMode, "&",
                            #"limit=", "1000", "&",
                            "database=", "pubchem", "&",
                            #"mzppm=", "7", "&"
                            #"mzabs=", "0.005", "&",
                            "peaks=", fragmentStrings
    )
  } else {
    
    fragmentStrings <- paste(fragmentsPositiveX, fragmentsPositiveY, sep = "_", collapse = ";")
    
    ## https://msbi.ipb-halle.de/MetFragBeta/landing.xhtml?FragmentPeakMatchAbsoluteMassDeviation=0.01&FragmentPeakMatchRelativeMassDeviation=10&DatabaseSearchRelativeMassDeviation=10&PeakList=110_100;210_100&IonizedPrecursorMass=200.101&MetFragDatabaseType=PubChem
    #FragmentPeakMatchAbsoluteMassDeviation
    #FragmentPeakMatchRelativeMassDeviation
    #DatabaseSearchRelativeMassDeviation
    #PrecursorCompoundIDs
    #IonizedPrecursorMass
    #NeutralPrecursorMolecularFormula
    #PrecursorIonMode
    #IonizedPrecursorMass
    landingPageUrl <- paste(sep = "",
                            "https://msbi.ipb-halle.de/MetFragBeta/landing.xhtml", "?",
                            #"https://msbi.ipb-halle.de/MetFrag/landing.xhtml", "?",
                            "NeutralPrecursorMass", "=", neutralMass, "&",
                            "PrecursorIonMode", "=", ionMode, "&",
                            "MetFragDatabaseType", "=", "PubChem", "&",
                            "PeakList", "=", fragmentStrings
    )
  }
  
  
  #writeClipboard(landingPageUrl, format = 1)
  returObj <- list(
    error = error,
    landingPageUrl = landingPageUrl,
    precursorMass = precursorMass,
    neutralMass = neutralMass,
    adduct = adduct,
    fragmentMass = fragmentsPositiveX,
    fragmentIntensities = fragmentsPositiveY
  )
  
  return(returObj)
}
getMS2spectrum <- function(dataList, clusterDataList, treeLabel){
  if(treeLabel < 0){
    ###############################################
    ## leaf
    #return(getMS2spectrumInfoForPrecursorLeaf(dataList, clusterDataList, treeLabel))
    return(clusterDataList$ms2spectrumInfoForLeaves[[-treeLabel]])
  } else {
    ###############################################
    ## inner node
    #return(getMS2spectrumInfoForCluster(dataList, clusterDataList, treeLabel))
    return(clusterDataList$ms2spectrumInfoForClusters[[treeLabel]])
  }
}
getMS2spectrumInfoForPrecursorLeaf <- function(dataList, clusterDataList, treeLabel, outAdductWarning = TRUE){
  if(treeLabel >= 0)
    return(NULL)
  ###############################################
  ## leaf
  precursorIndex <- clusterDataList$filterObj$filter[[-treeLabel]]
  return(getMS2spectrumInfoForPrecursor(dataList, precursorIndex, outAdductWarning))
}
getMS2spectrumInfoForPrecursor <- function(dataList, precursorIndex, outAdductWarning = TRUE){
  
  precursorSet <- precursorIndex
  numberOfPrecursors <- length(precursorSet)
  
  ## fragments
  features <- dataList$featureIndeces[[precursorIndex]]
  fragmentsX <- dataList$fragmentMasses[features]
  fragmentsY <- as.numeric(dataList$featureMatrix[precursorIndex, features])
  fragmentsY[fragmentsY > 1] <- 1
  fragmentsColor <- rep(x = "black", times = length(fragmentsY))
  
  ## fragment discriminativity
  fragmentDiscriminativity <- rep(x = 0, times = length(features))
  
  
  ## info and MetFrag link
  featureID <- trimws(gsub(x = dataList$precursorLabels[[precursorIndex]], pattern = " +", replacement = " "))
  #featureID <- trimws(gsub(x = clusterDataList$cluster$labels[[-treeLabel]], pattern = " +", replacement = " "))
  featureFamilies <- dataList$annoArrayOfLists[[precursorIndex]]
  featureFamilies <- ifelse(
    test = length(featureFamilies) == 0, 
    yes = "None", 
    no = paste(unlist(featureFamilies), collapse = ", ")
  )
  featureName <- dataList$dataFrameInfos[[precursorIndex, "Metabolite name"]]
  
  infoText <- paste(
    "The MS/MS spectrum of MS\u00B9 feature '", 
    featureID, "'", 
    " comprises ", length(fragmentsX), " fragments. Families: ", featureFamilies, ". Name: ", featureName,
    sep = ""
  )
  metFragLinkList <- getMetFragLink(dataList, precursorIndex, outAdductWarning = FALSE)
  
  
  ## order data
  order <- order(fragmentsX)
  fragmentsX <- fragmentsX[order]
  fragmentsY <- fragmentsY[order]
  fragmentsColor <- fragmentsColor[order]
  fragmentDiscriminativity <- fragmentDiscriminativity[order]
  
  ## box
  resultObj <- list()
  resultObj$fragmentMasses <- fragmentsX
  resultObj$fragmentAbundances <- fragmentsY
  resultObj$fragmentColor <- fragmentsColor
  resultObj$fragmentDiscriminativity <- fragmentDiscriminativity
  resultObj$infoText <- infoText
  resultObj$infoFeatureLabel <- featureID
  resultObj$infoFragmentCount <- length(fragmentsX)
  resultObj$infoFamilies <- featureFamilies
  resultObj$infoName <- featureName
  resultObj$metFragLinkList <- metFragLinkList
  resultObj$precursorSet <- precursorSet
  resultObj$numberOfPrecursors <- numberOfPrecursors
  
  return(resultObj)
}
getMS2spectrumInfoForCluster <- function(dataList, clusterDataList, treeLabel){
  if(treeLabel < 0)
    return(NULL)
  ###############################################
  ## inner node
  clusterIndex <- treeLabel
  clusterMembersPrecursors <- sort(clusterDataList$innerNodeMembersPrecursors[[clusterIndex]])
  precursorSet <- clusterMembersPrecursors
  numberOfPrecursors <- length(precursorSet)
  numberOfClusterMembers <- length(clusterMembersPrecursors)
  
  ## fragments
  featuresIntersection <- clusterDataList$innerNodeFeaturesIntersection[[clusterIndex]]
  featuresUnion <- clusterDataList$innerNodeFeaturesUnion[[clusterIndex]]
  #fragmentsX <- dataList$fragmentMasses[featuresIntersection]
  #fragmentsY <- apply(X = data.numericmatrix(dataList$featureMatrix[clusterMembersPrecursors, featuresIntersection]), MARGIN = 2, FUN = mean)
  fragmentsX <- dataList$fragmentMasses[featuresUnion]
  fragmentsY <- apply(X = data.numericmatrix(dataList$featureMatrix[clusterMembersPrecursors, featuresUnion]), MARGIN = 2, FUN = mean)
  
  selectedPositive <- clusterDataList$innerNodeFeaturesCountsMatrix[clusterIndex, featuresUnion]
  coverageSelected <- selectedPositive / numberOfClusterMembers
  #fragmentsColor <- rep(x = "black", times = length(fragmentsY))
  fragmentsColor <- vector(length = length(fragmentsX))
  fragmentsColor[coverageSelected >= minimumProportionOfLeafs] <- "black"
  fragmentsColor[coverageSelected < minimumProportionOfLeafs] <- "grey"
  
  ## fragment discriminativity
  rootIndex <- length(clusterDataList$cluster$height)
  numberOfLeaves <- length(clusterDataList$innerNodeMembersPrecursors[[rootIndex]])
  numberOfNotClusterMembers <- numberOfLeaves - numberOfClusterMembers
  positives <- clusterDataList$innerNodeFeaturesCountsMatrix[rootIndex, featuresUnion]
  notSelectedPositive <- positives - selectedPositive
  
  #selectedNegative <- numberOfClusterMembers - featuresCountsMatrixSelected
  #notSelectedNegative <- numberOfNotClusterMembers - featuresCountsMatrixNotSelected
  #coverageNotSelected <- notSelectedPositive / numberOfNotClusterMembers
  #coverageAll <- positives / numberOfLeaves
  #relativePositives <- (selectedPositive - notSelectedPositive) / numberOfClusterMembers
  #fragmentDiscriminativity <- coverageSelected - coverageNotSelected
  #fragmentDiscriminativity <- coverageSelected * (1 - coverageNotSelected)
  #fragmentDiscriminativity <- ((selectedPositive - notSelectedPositive) / numberOfClusterMembers) * coverageSelected
  fragmentDiscriminativity <- (selectedPositive - notSelectedPositive) / numberOfClusterMembers
  fragmentDiscriminativity[fragmentDiscriminativity < 0] <- 0
  
  ## reduce to fragments above minimumProportionToShowFragment
  fragmentsX               <- fragmentsX              [coverageSelected > minimumProportionToShowFragment]
  fragmentsY               <- fragmentsY              [coverageSelected > minimumProportionToShowFragment]
  fragmentsColor           <- fragmentsColor          [coverageSelected > minimumProportionToShowFragment]
  fragmentDiscriminativity <- fragmentDiscriminativity[coverageSelected > minimumProportionToShowFragment]
  
  if(length(fragmentDiscriminativity) > 0)
    clusterDiscriminativity <- max(fragmentDiscriminativity)
  else
    clusterDiscriminativity <- 0
  
  ## annotations
  minimumProportionOfMembership <- 0.5
  featureFamilies_all <- unlist(unique(dataList$annoArrayOfLists[precursorSet])) ## all families
  proportionOfMembership <- sapply(X = featureFamilies_all, FUN = function(featureFamily){
    numberOfMembersHere <- sum(unlist(lapply(X = dataList$annoArrayOfLists[precursorSet], function(families){featureFamily %in% families})))
    return(numberOfMembersHere / length(precursorSet))
  })
  frequentFamilies <- featureFamilies_all[proportionOfMembership >= minimumProportionOfMembership]
  
  featureFamilies <- unlist(unique(dataList$annoArrayOfLists[precursorSet]))
  featureFamilies <- ifelse(
    test = length(featureFamilies) == 0, 
    yes = "None", 
    no = paste(unlist(featureFamilies), collapse = ", ")
  )
  
  ## info
  infoText <- paste(
    "This cluster has a cluster discriminativity of ", format(x = clusterDiscriminativity*100, digits = 3, nsmall = 2), "%",
    " and comprises ", length(clusterMembersPrecursors), " MS\u00B9 features",
    " which have ", length(fragmentsX), " fragment(s) in common.", 
    sep = ""
  )
  
  ## order data
  order <- order(fragmentsX)
  fragmentsX <- fragmentsX[order]
  fragmentsY <- fragmentsY[order]
  fragmentsColor <- fragmentsColor[order]
  fragmentDiscriminativity <- fragmentDiscriminativity[order]
  
  ## box
  resultObj <- list()
  resultObj$fragmentMasses <- fragmentsX
  resultObj$fragmentAbundances <- fragmentsY
  resultObj$fragmentColor <- fragmentsColor
  resultObj$fragmentDiscriminativity <- fragmentDiscriminativity
  resultObj$clusterDiscriminativity <- clusterDiscriminativity
  resultObj$frequentFamilies <- frequentFamilies
  resultObj$infoText <- infoText
  resultObj$metFragLinkList <- NULL
  resultObj$precursorSet <- precursorSet
  resultObj$numberOfPrecursors <- numberOfPrecursors
  
  return(resultObj)
}
## XXX adapt getTableFromTreeSelection
getTableFromPrecursorSet <- function(dataList, precursorSet){
  ###############################################
  ## table data
  numberOfPrecursors <- length(precursorSet)
  
  ## measurements
  columnNames <- unlist(lapply(X = dataList$grouXXXps, FUN = dataList$dataMeanColumnNameFunctionFromName))
  dataFrameMeasurements     <- data.frame(dataList$dataFrameMeasurements[precursorSet, columnNames, drop=FALSE])
  colnames(dataFrameMeasurements) <- columnNames
  rownames(dataFrameMeasurements) <- dataList$precursorLabels[precursorSet]
  dataFrameMeasurements <- format(x = dataFrameMeasurements, nsmall = 4)
  
  ## MS2 fragments
  props <- apply(
    X = dataList$featureMatrix[precursorSet, , drop = FALSE], 
    MARGIN = 2, 
    FUN = function(x){ sum(x != 0) / numberOfPrecursors }
  )
  featureIndeces <- which(props > minimumProportionToShowFragment)
  featureMatrix <- data.frame(as.matrix(dataList$featureMatrix[precursorSet, featureIndeces, drop = FALSE]))
  rownames(featureMatrix) <- dataList$precursorLabels[precursorSet]
  colnames(featureMatrix) <- colnames(dataList$featureMatrix)[featureIndeces]
  featureMatrix <- format(x = featureMatrix, digits = 1, nsmall = 4)
  if(length(featureIndeces) > 0){
    featureMatrix[featureMatrix=="0.0000"] <- "-"
    featureMatrix[featureMatrix=="1.0000"] <- "1"
  }
  
  ## annotations
  setOfAnnotationSets <- precursorSetToSetOfAnnotationSets(dataList, precursorSet)
  setOfAnnotations <- unlist(lapply(X = setOfAnnotationSets, FUN = function(x){
    paste(x, collapse = ", ")
  }))
  annotationDataFrame <- data.frame("Annotation" = setOfAnnotations, row.names = rownames(dataFrameMeasurements))
  
  ## box
  precursorLabels <- rownames(dataFrameMeasurements)
  ms1abundanceDataFrame <- dataFrameMeasurements
  ms2fragmentDataFrame <- featureMatrix
  
  resultObj <- list(
    precursorSet = precursorSet,
    featureIndeces = featureIndeces,
    ms1abundanceDataFrame = ms1abundanceDataFrame,
    ms2fragmentDataFrame = ms2fragmentDataFrame,
    annotationDataFrame = annotationDataFrame
  )
  
  return(resultObj)
}
getPrecursorSetFromTreeSelections <- function(clusterDataList, clusterLabels){
  precursorSet <- NULL
  for(clusterLabel in clusterLabels)
    precursorSet <- c(precursorSet, getPrecursorSetFromTreeSelection(clusterDataList, clusterLabel))
  return(precursorSet)
}
getPrecursorSetFromTreeSelection <- function(clusterDataList, clusterLabel){
  if(clusterLabel < 0){
    ###############################################
    ## leaf
    precursorIndex <- clusterDataList$filterObj$filter[[-clusterLabel]]
    precursorSet <- precursorIndex
  } else {
    ###############################################
    ## inner node
    clusterIndex <- clusterLabel
    precursorSet <- sort(clusterDataList$innerNodeMembersPrecursors[[clusterIndex]])
  }
  return(precursorSet)
}
getSpectrumStatistics <- function(dataList, precursorSet){
  if(FALSE){
    dataList_ <<- dataList
    precursorSet_ <<- precursorSet
  }
  if(FALSE){
    dataList <- dataList_
    precursorSet <- precursorSet_
  }
  
  fragmentCounts <- Matrix::colSums(x = dataList$featureMatrix[precursorSet, , drop=FALSE] != 0)
  theseFragments <- fragmentCounts > 0
  fragmentCounts <- fragmentCounts[theseFragments]
  fragmentMasses <- dataList$ms2_masses[theseFragments]
  return(list(
    fragmentMasses = fragmentMasses,
    fragmentCounts = fragmentCounts
  ))
}
getMS2plotData <- function(matrixRows, matrixCols, matrixVals, fragmentMasses){
  numberOfFragments <- length(fragmentMasses)
  meanIntensity <- vector(mode = "numeric", length = numberOfFragments)
  fragmentCount <- vector(mode = "numeric", length = numberOfFragments)
  for(colIdx in seq_len(numberOfFragments)){
    intensities <- matrixVals[matrixCols == colIdx]
    fragmentCount[[colIdx]] <- length(intensities)
    meanIntensity[[colIdx]] <- mean(x = intensities)
  }
  
  presentFragments <- fragmentCount > 0
  
  resultObj <- list()
  resultObj$numberOfFragments <- fragmentCount
  resultObj$averageAbundance  <- meanIntensity
  resultObj$masses            <- fragmentMasses
  
  return(resultObj)
}
regExExtraction = function(pattern, x, ...) {
  args = list(...)
  args[['perl']] = T
  
  re = do.call(gregexpr, c(list(pattern, x), args))
  
  mapply(function(re, x){
    
    cap = sapply(attr(re, 'capture.names'), function(n, re, x){
      start = attr(re, 'capture.start')[, n]
      len   = attr(re, 'capture.length')[, n]
      end   = start + len - 1
      tok   = substr(rep(x, length(start)), start, end)
      
      return(tok)
    }, re, x, simplify=F, USE.NAMES=T)
    
    return(cap)
  }, re, x, SIMPLIFY=F)
}
numericVectorToStringForEval <- function(vec){
  return(paste("c(", paste(vec, collapse = ","), ")", sep = ""))
}
colorVectorToStringForEval <- function(vec){
  return(paste("c('", paste(vec, collapse = "','"), "')", sep = ""))
}
ipb-halle/MetFamily documentation built on Sept. 5, 2024, 12:01 a.m.