R/peaks_to_function.R

Defines functions doHeatmapMummichogTest PlotlyPeaks2Paths votep sumz sump sumlog GetMummichogPathSetDetails GetCompoundDetails CreateMummichogLibs SetOrganism Prepare4TarIntegNetwork Prepare4IntegNetwork PrepareIntegCMPDList CreateListHeatmapJson PreparePeakTable4PSEA CreateHeatmapJson .rt.included SetRTincluded fgsea2 myFastList Convert2Dictionary convert2JsonList count_cpd2mz make_ecpdlist make_cpdlist mz_tolerance GetOrgMummichogVal GetOrgMummichogLbl GetTopInx GetMummiDataType GetMummiMode GetDefaultRTTol GetDefaultPvalCutoff GetECMsg GetAdductMsg GetCurrencyMsg GetMummiResColNames GetMummiResRowNames GetMummiResMatrix GetMummichogHTMLPathSet GetMatchingDetails PlotPSEAIntegPaths PlotPeaks2Paths UpdateEC_Rules new_adduct_mzlist PerformAdductMapping PerformCurrencyMapping .compute.mummichog.RT.fgsea .compute.mummichog.fgsea .save.mummichog.restable .compute.mummichogRTSigPvals .compute.mummichogSigPvals ComputeMummichogRTPermPvals .perform.mummichogRTPermutations ComputeMummichogPermPvals .perform.mummichogPermutations CalculateEffectSize .search.compoundLibMeta .search.compoundLib .setup.psea.library .init.RT.Permutations .init.Permutations PerformPSEA SetMetaPeaksPvals SetMummichogPval SetMummichogPvalFromPercent SanityCheckMummichogData UpdateInstrumentParameters Convert2Mummichog GetPeakFormat SetPeakFormat .format_mzmine_pktable Read.PeakListData SetPeakEnrichMethod Setup.AdductData

Documented in Convert2Mummichog CreateMummichogLibs GetCompoundDetails GetMummichogPathSetDetails GetTopInx make_cpdlist make_ecpdlist PerformAdductMapping PerformCurrencyMapping PerformPSEA PlotPeaks2Paths PlotPSEAIntegPaths PreparePeakTable4PSEA Read.PeakListData SanityCheckMummichogData SetMummichogPval SetMummichogPvalFromPercent SetOrganism SetPeakEnrichMethod SetPeakFormat SetRTincluded Setup.AdductData UpdateEC_Rules UpdateInstrumentParameters

### An R package for pathway enrichment analysis for untargeted metabolomics
### based on high-resolution LC-MS platform
### This is based on the mummichog algorithm implemented in python (http://mummichog.org/)
### The goals of developing MummichogR are
### 1) to make this available to the R user community
### 2) high-performance (similar or faster compared to python)
### 3) broader pathways support - by adding support for 21 common organisms based on KEGG pathways
### 4) companion web interface on MetaboAnalyst - the "MS Peaks to Pathways" module
### @authors J. Chong \email{jasmine.chong@mail.mcgill.ca}, J. Xia \email{jeff.xia@mcgill.ca}
### McGill University, Canada
### License: GNU GPL (>= 2)

#'Save adduct names for mapping
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects)
#'@param qvec Input the vector to query
#'@author Jeff Xia\email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'
Setup.AdductData <- function(mSetObj=NA, qvec){
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$dataSet$adduct.list <- qvec;
  mSetObj$dataSet$adduct.custom <- TRUE
  return(.set.mSet(mSetObj));
}

#'Set the peak enrichment method for the MS Peaks to Paths module
#'@description This function sets the peak enrichment method.
#'@param mSetObj Input the name of the created mSetObj.
#'@param algOpt algorithm option, can be "gsea", "mum" and "integ"
#'@param version version of mummichog
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
SetPeakEnrichMethod <- function(mSetObj=NA, algOpt, version="v2"){
  
  mSetObj <- .get.mSet(mSetObj);
  #mSetObj$peaks.alg <- algOpt
  
  mSetObj$paramSet$version <- version
  mSetObj$input_cpdlist <- NULL;

  if(algOpt == "gsea"){
    #anal.type <<- "gsea_peaks";    
    mSetObj$paramSet$anal.type <- "gsea_peaks";
    mSetObj$mum_nm <- "mummichog_query_gsea.json";
    mSetObj$mum_nm_csv <- "mummichog_pathway_enrichment_gsea.csv";
  }else if(algOpt == "mum"){
    #anal.type <<- "mummichog"
    mSetObj$paramSet$anal.type <- "mummichog";
    #set json (for kegg network) and csv names
    mSetObj$mum_nm <- "mummichog_query_mummichog.json";
    mSetObj$mum_nm_csv <- "mummichog_pathway_enrichment_mummichog.csv";
  }else{
    #anal.type <<- "integ_peaks"
    mSetObj$paramSet$anal.type <- "integ_peaks";
    #set json (for kegg network) and csv names
    mSetObj$mum_nm <- "mummichog_query_integ.json";
    mSetObj$mum_nm_csv <- "mummichog_pathway_enrichment_integ.csv";
  }
  return(.set.mSet(mSetObj));
}

#'Constructor to read uploaded user files into the mummichog object
#'@description This function handles reading in CSV or TXT files and filling in the mSet object
#' for mummichog analysis. It makes sure that all necessary columns are present.
#'@usage Read.PeakListData(mSetObj=NA, filename = NA, meta.anal = FALSE, method = "pvalue")
#'@param mSetObj Input the name of the created mSetObj.
#'@param filename Input the path name for the CSV/TXT files to read.
#'@param meta.anal Logical, TRUE if data will be used for meta-analysis.
#'@param method Input the type of statistical scores included in the
#'mummichog input. "pvalue" for p-values, "es" for effect-sizes, and
#'"both" for both p-values and effect-sizes.
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@import qs
#'@export
Read.PeakListData <- function(mSetObj=NA, filename = NA, 
                              meta.anal = FALSE,
                              method = "pvalue") {
  
  mSetObj <- .get.mSet(mSetObj);
  
  file_name <- tools::file_path_sans_ext(basename(filename)) 
  mumDataContainsPval = 1; #whether initial data contains pval or not
  input <- as.data.frame(.readDataTable(filename));
  user_cols <- gsub("[^[:alnum:]]", "", colnames(input));
  mummi.cols <- c("m.z", "p.value", "t.score", "r.t")
  
  if(meta.anal & method %in% c("es", "both")){
    mummi.cols <- c(mummi.cols, "effect.size", "lower.ci", "upper.ci")
  }
  
  # first check if mode included
  mumDataContainsMode <- "mode" %in% user_cols;
  
  if(mumDataContainsMode){
    mode.info <- input$mode  
    input <- subset(input, select=-mode)
    user_cols <- gsub("[^[:alnum:]]", "", colnames(input))
  }
  
  # next check what column names are there
  hit <- "mz" %in% user_cols;
  
  if(sum(hit) < 1){
    AddErrMsg("Missing information, data must contain a 'm.z' column!");
    return(0);
  }
  
  if(length(colnames(input) %in% mummi.cols) == 1){
    peakFormat <- mSetObj$paramSet$peakFormat;
  }else{
    # subset to what's needed for ms peaks
    # then rename columns
    hits2 <- match(gsub("[^[:alnum:]]", "", mummi.cols), user_cols)
    input <- input[, na.omit(hits2)]  
    user_cols <- user_cols[na.omit(hits2)]
    hits.colnames <- match(user_cols, gsub("[^[:alnum:]]", "", mummi.cols))
    user.cols <- mummi.cols[na.omit(hits.colnames)]
    peakFormat <- paste0(substr(sort(user.cols), 1, 1), collapse = "")
    colnames(input) <- user.cols
  }
  
  rt.hit <- "r.t" %in% colnames(input)
  
  if(rt.hit > 0){
    rt = TRUE
  }else{
    rt = FALSE
  }
  
  qs::qsave(input, "mum_raw.qs");
  
  if(!"p.value" %in% colnames(input)){
    mumDataContainsPval <- 0;
    input[,'p.value'] <- rep(0, length=nrow(input))
  }
  
  if(!"t.score" %in% colnames(input)){
    input[,'t.score'] <- rep(0, length=nrow(input))
  }
  
  if(rt){
    mSetObj$dataSet$mummi.orig <- cbind(input$p.value, input$m.z, input$t.score, input$r.t);
    colnames(mSetObj$dataSet$mummi.orig) = c("p.value", "m.z", "t.score", "r.t")
  }else{
    mSetObj$dataSet$mummi.orig <- cbind(input$p.value, input$m.z, input$t.score);
    colnames(mSetObj$dataSet$mummi.orig) = c("p.value", "m.z", "t.score")
  }
  
  if(meta.anal & method %in% c("es", "both")){
    mSetObj$dataSet$mummi.orig <- cbind(mSetObj$dataSet$mummi.orig, effect.size=input$effect.size, 
                                        lower.ci=input$lower.ci, upper.ci=input$upper.ci);
  }
  
  if(mSetObj$dataSet$mode == "positive"){
    mSetObj$dataSet$pos_inx <- rep(TRUE, nrow(mSetObj$dataSet$mummi.orig))
  }else if(mSetObj$dataSet$mode == "negative"){
    mSetObj$dataSet$pos_inx <- rep(FALSE, nrow(mSetObj$dataSet$mummi.orig) )
  }else{ # mixed
    mSetObj$dataSet$pos_inx <- mode.info == "positive"
  }
  
  mSetObj$paramSet$mumRT = rt;
  mSetObj$dataSet$mum.type = "list";
  mSetObj$msgSet$read.msg <- paste("A total of", length(input$p.value), 
                                   "m/z features were found in your uploaded data.");
  mSetObj$dataSet$fileName <- file_name;
  mSetObj$paramSet$mumDataContainsPval <- mumDataContainsPval;
  mSetObj$paramSet$peakFormat <- peakFormat;
  mSetObj$dataSet$meta.info <- as.matrix(1); # Define a value to avoid bug

  return(.set.mSet(mSetObj));
}

# function to format peak table from mzmine to right format for metaboanalyst
# @format "row" for features in rows, "col" for features in columns.
.format_mzmine_pktable <- function(mSetObj=NA, fileName, format="rowu", group1=NA, group2=NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  mzmine_table <- .readDataTable(fileName)
  
  if(class(mzmine_table) == "try-error" || ncol(mzmine_table) == 1){
    AddErrMsg("Data format error. Failed to read in the data!");
    AddErrMsg("Make sure the data table is saved as comma separated values (.csv) format!");
    AddErrMsg("Please also check the followings: ");
    AddErrMsg("Either sample or feature names must in UTF-8 encoding; Latin, Greek letters are not allowed.");
    AddErrMsg("We recommend to use a combination of English letters, underscore, and numbers for naming purpose.");
    AddErrMsg("Make sure sample names and feature (peak, compound) names are unique.");
    AddErrMsg("Missing values should be blank or NA without quote.");
    AddErrMsg("Make sure the file delimeters are commas.");
    return(0);
  }
  
  rownames(mzmine_table) <- mzmine_table[,1]
  mzmine_table <- mzmine_table[,-1]
  
  if(!is.na(group1) & !is.na(group2)){
    if(format == "row"){
      keep.inx <- mzmine_table[1, ] %in% c(group1, group2)
      mzmine_table <- mzmine_table[, keep.inx]
    }else{
      keep.inx <- mzmine_table[, 1] %in% c(group1, group2)
      mzmine_table <- mzmine_table[keep.inx, ]
    }
    newnames <- paste0(tools::file_path_sans_ext(basename(fileName)), "_", group1, "_", group2, ".csv")
  }else{
    
    mzmine_table[1, ] <- tolower(mzmine_table[1, ])
    groups <- unique(unlist(mzmine_table[1, ]))
    
    if(length(groups) > 2){
      AddErrMsg("Only two groups permitted!");
      if("qc" %in% groups){
        AddErrMsg("Remove QCs prior to uploading the mzmine table!");
      }
      return(0);
    }
    
    if(.on.public.web){
      newnames <- "mzmine_peaktable_metaboanalyst.csv"
    }else{
      newnames <- paste0(tools::file_path_sans_ext(basename(fileName)), "_formatted.csv")
    }
  }
  
  if(format == "colu"){ # samples in columns so features are in row
    feats <- gsub("/", "__", rownames(mzmine_table) )
    feats <- sub(".*?__", "", feats )
    feats <- sub("min", "", feats )
    feats <- sub("mz", "", feats )
    feats <- make.unique(feats, sep="")
    rownames(mzmine_table) <- feats
  }else{ # features in columns
    feats <- gsub("/", "__", colnames(mzmine_table) )
    feats <- sub(".*?__", "", feats )
    feats <- sub("min", "", feats )
    feats <- sub("mz", "", feats )
    feats <- make.unique(feats, sep="")
    colnames(mzmine_table) <- feats
  }
  fast.write.csv(mzmine_table, newnames, row.names = TRUE)
  return(.set.mSet(mSetObj))
}

#'Set the peak format for the mummichog analysis
#'@description Set the peak format for mummichog analysis.
#'@param mSetObj mSetObj
#'@param type Input the name of mummichog analysis type, usually 'mpt'.
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
SetPeakFormat <-function(mSetObj=NA, type = "mpt"){
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$paramSet$peakFormat <- type;
  return(.set.mSet(mSetObj))
}

GetPeakFormat <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  return(mSetObj$paramSet$peakFormat)
}

#'Convert mSetObj to proper format for MS Peaks to Pathways module
#'@description Following t-test analysis or effect size calculation, 
#'this functions converts the results from the mSetObj 
#'to the proper format for mummichog analysis. 
#'@param mSetObj Input the name of the created mSetObj.
#'@param rt Logical, whether or not to include retention time information.
#'@param rds.file Logical, if true, the "annotated_peaklist.rds"
#'must be in the current working directory to get corresponding retention time
#'information for the features. If not, the retention time information
#'will be taken from the feature names. Feature names must be formatted
#'so that the mz and retention time for a single peak is separated by two
#'underscores. For instance, m/z of 410.2148 and retention time of 42.46914 seconds
#'must be formatted as 410.2148__42.46914.
#'@param rt.type Character, input whether retention time is in seconds (default as RT using
#'MetaboAnalystR is seconds) or minutes (as from MZmine).
#'@param test Character, input what statistical values to include in the mummichog input. 
#'For p-values and t-scores only from t-test, use "tt".
#'For log2FC from the fold-change analsis, use "fc".
#'For effect-sizes, use "es".
#'For, p-values, fold-changes and effect sizes, use "all".
#'For multiple groups, use 'aov'.
#'@param mode ion mode, positive or negative
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export

Convert2Mummichog <- function(mSetObj=NA, 
                              rt=FALSE, 
                              rds.file=FALSE, 
                              rt.type="seconds", 
                              test="tt", mode=NA){

  if(test=="tt"|test=="all"){
    if(is.null(mSetObj$analSet$tt)){
      AddErrMsg("T-test was not performed!")
      return(0)
    }
    
    tt.pval <- sort(mSetObj$analSet$tt$p.value);
    fdr <- p.adjust(tt.pval, "fdr")
    #fdr <- tt.pval
    mz.pval <- names(tt.pval)
    pvals <- cbind(mz.pval, as.numeric(fdr))
    colnames(pvals) <- c("m.z", "p.value")
    
    tt.tsc <- sort(mSetObj$analSet$tt$t.score);
    mz.tsc <- names(tt.tsc)
    tscores <- cbind(mz.tsc, as.numeric(tt.tsc))
    colnames(tscores) <- c("m.z", "t.score")
  } else if(test=="es"|test=="all"){
    effect.size <- mSetObj$analSet$effect.size;    
    if(is.null(effect.size)){
      AddErrMsg("Effect size was not calculated!")
      return(0)
    }
    
    mz <- rownames(effect.size)
    esize <- cbind(mz, effect.size)
    colnames(esize) <- c("m.z", "effect.size", "st.dev", "lower.ci", "upper.ci");

  } else if(test=="fc"|test=="all"){
    log2fc <- mSetObj$analSet$fc$fc.log    
    if(is.null(log2fc)){
      AddErrMsg("Fold-change was not calculated!")
      return(0)
    }
    
    mz.fc <- names(log2fc)
    fcs <- cbind(mz.fc, as.numeric(log2fc))
    colnames(fcs) <- c("m.z", "log2.fc");

  } else if(test == "aov"){
    if(is.null(mSetObj$analSet$aov)){
      AddErrMsg("ANOVA was not performed!")
      return(0)
    }
    aov.pvals <- mSetObj$analSet$aov$sig.p;
    
    # Note pre-order will have effect on results
    # Let's be consistent
    ord.inx <- order(aov.pvals);
    fdr <- mSetObj$analSet$aov$sig.fdr[ord.inx];
    mz.pval <- names(fdr);
    pvals <- cbind(mz.pval, as.numeric(fdr));
    colnames(pvals) <- c("m.z", "p.value");

  } else {
      AddErrMsg("Unknown method!")
      return(0);
  }
  
  if(rt & rds.file){
    
    if(!file.exists("annotated_peaklist.rds")){
      AddErrMsg("annotated_peaklist.rds not found in current working directory!")
      return(0)
    }
    
    camera_output <- readRDS("annotated_peaklist.rds")
    mz.cam <- round(camera_output$mz, 5) 
    rt.cam <- round(camera_output$rt, 5) 
    camera <- cbind(mz.cam, rt.cam)
    colnames(camera) <- c("m.z", "r.t")
    
    if(test == "tt"){
      mummi_new <- Reduce(function(x,y) merge(x,y,by="m.z", all = TRUE), list(pvals, tscores, camera))
      complete.inx <- complete.cases(mummi_new[,c("p.value", "t.score", "r.t")]) # filter out m/zs without pval and tscore
    }else if(test == "es"){
      mummi_new <- Reduce(function(x,y) merge(x,y,by="m.z", all = TRUE), list(esize, camera))
      complete.inx <- complete.cases(mummi_new[,c("effect.size", "r.t")]) 
    }else if(test == "all"){
      mummi_new <- Reduce(function(x,y) merge(x,y,by="m.z", all = TRUE), list(pvals, tscores, fcs, esize, camera))
      complete.inx <- complete.cases(mummi_new[,c("p.value", "t.score", "log2.fc", "effect.size", "r.t")]) # filter out m/zs without pval and tscore
    }else if(test == "fc"){
      mummi_new <- Reduce(function(x,y) merge(x,y,by="m.z", all = TRUE), list(fcs, camera))
      complete.inx <- complete.cases(mummi_new[,c("log2.fc", "r.t")])
    }else if(test == "aov"){
      mummi_new <- Reduce(function(x,y) merge(x,y,by="m.z", all = TRUE), list(pvals, camera))
      complete.inx <- complete.cases(mummi_new[,c("p.value", "r.t")])
    }
    
    mummi_new <- mummi_new[complete.inx,]
    
  } else {
    
    if(test=="tt"){
      mummi_new <- merge(pvals, tscores);
    }else if(test=="es"){
      mummi_new <- esize;
    }else if(test=="all"){
      mummi_new <- Reduce(merge, list(pvals, tscores, fcs, esize));
      mummi_new[] <- lapply(mummi_new, as.character);
    }else if(test=="fc"){
      mummi_new <- fcs;
    }else if(test=="aov"){
      mummi_new <- pvals;
    }

    if(rt){ # taking retention time information from feature name itself
      feat_info <- mummi_new[,1]
      feat_info_split <- matrix(unlist(strsplit(feat_info, "__", fixed=TRUE)), ncol=2, byrow=T)
      colnames(feat_info_split) <- c("m.z", "r.t")
      
      if(rt.type == "minutes"){
        rtime <- as.numeric(feat_info_split[,2])
        rtime <- rtime * 60
        feat_info_split[,2] <- rtime
      }
      
      mummi_new <- cbind(feat_info_split, mummi_new[,-1])
    }
  }
  
  if(!is.na(mode)){
    if(mode=="positive"){
      mode <- rep("positive", nrow(mummi_new))
    }else{
      mode <- rep("negative", nrow(mummi_new))
    }
    mummi_new <- cbind(mummi_new, mode)
  }
  
  mummi_new[,1] <- as.numeric(make.unique(as.character(mummi_new[,1]), sep=""));  
  filename <- paste0("mummichog_input_", Sys.Date(), ".txt");
  input_filename <<- filename;
  write.table(mummi_new, filename, row.names = FALSE);
  
  return(.set.mSet(mSetObj))
}

#'Update the mSetObj with user-selected parameters for MS Peaks to Pathways.
#'@description This functions handles updating the mSet object for mummichog analysis. 
#'It is necessary to utilize this function
#'to specify to the organism's pathways to use (libOpt), the mass-spec mode (msModeOpt) 
#'and mass-spec instrument (instrumentOpt).
#'@usage UpdateInstrumentParameters(mSetObj=NA, instrumentOpt, 
#'msModeOpt, force_primary_ion, rt_frac, rt_tol)
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects).
#'@param instrumentOpt Numeric. Define the mass-spec instrument used to perform untargeted metabolomics.
#'@param msModeOpt  Character. Define the mass-spec mode of the instrument used to perform untargeted metabolomics.
#'@param rt_frac rt_frac.
#'@param rt_tol rt_tol.
#'@param force_primary_ion Character, if "yes", only mz features that match compounds with a primary ion are kept.
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export

UpdateInstrumentParameters <- function(mSetObj=NA, 
                                       instrumentOpt, 
                                       msModeOpt, 
                                       force_primary_ion = "yes", 
                                       rt_frac = 0.02, 
                                       rt_tol = NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  if(!is.numeric(instrumentOpt)){
    AddErrMsg("Mass accuracy must be numeric!")
  }else{
    mSetObj$dataSet$instrument <- instrumentOpt;
  }
  
  mSetObj$dataSet$mode <- msModeOpt;
  mSetObj$dataSet$primary_ion <- force_primary_ion;
  mSetObj$dataSet$rt_tol <- rt_tol;
  mSetObj$dataSet$rt_frac <- rt_frac;
  
  return(.set.mSet(mSetObj));
}

#'Sanity Check Data
#'@description SanityCheckData is used for data processing, and performs a basic sanity
#'check of the uploaded data, ensuring that the data is suitable for further analysis.
#'The function ensure that all parameters are properly set based on updated parameters.
#'@usage SanityCheckMummichogData(mSetObj=NA)
#'@param mSetObj Input the name of the created mSetObj (see InitDataObjects).
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@import qs
#'@export

SanityCheckMummichogData <- function(mSetObj=NA){
  
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$msgSet$check.msg <- NULL;
  if(mSetObj$dataSet$mum.type == "table"){
    mSetObj$paramSet$mumDataContainsPval <- 1;
    orig.data<- qs::qread("data_orig.qs");
    l = sapply(colnames(orig.data),function(x) return(unname(strsplit(x,"/", fixed=TRUE)[[1]][1])))
    colnames(orig.data) <- l;
    qs::qsave(orig.data, file="data_orig.qs");
    
    if(.on.public.web){
      return(SanityCheckData(NA));
    }else{
      mSetObj <- SanityCheckData(mSetObj)
      return(.set.mSet(mSetObj));
    }
  }
  
  msg.vec <- NULL;
  mSetObj$mum_nm <- "mummichog_query_mummichog.json"
  mSetObj$mum_nm_csv <- "mummichog_pathway_enrichment_mummichog.csv"
  ndat <- mSetObj$dataSet$mummi.orig;
  pos_inx = mSetObj$dataSet$pos_inx
  ndat <- data.frame(cbind(ndat, pos_inx), stringsAsFactors = FALSE)
  
  rawdat <- qs::qread("mum_raw.qs");
  
  if(mSetObj$paramSet$mumRT){
    na.num <- sum(is.na(ndat$r.t))
    # filter out any reads w. NA RT
    if(na.num>0){
      na.inx <- which(is.na(ndat$r.t))
      ndat <- ndat[-na.inx,]
      msg.vec <- c(msg.vec, paste("A total of <b>", na.num, "</b> mz features with missing retention times were removed."));
    }
  }
  
  # another check to ensure no missing or NA values
  missing.inx <- apply(ndat, 2, function(x) any(is.na(x)))
  
  if(any(missing.inx)){
    AddErrMsg("NA values found in the uploaded data!")
    return(0)
  }
  
  read.msg <- mSetObj$msgSet$read.msg
  
  # sort mzs by p-value
  ord.inx <- order(ndat[,1]);
  ndat <- ndat[ord.inx,]; # order by p-vals
  
  # filter based on mz
  mznew <- ndat[,2];
  
  # trim to fit within 50 - 2000
  my.inx <- mznew > 50 & mznew < 2001;
  trim.num <- sum(!my.inx);
  range = paste0(min(ndat[,1]), "-", max(ndat[,1]))
  if(length(unique(mSetObj[["dataSet"]][["pos_inx"]])) > 1){
    colNMadd <- "and mode";
    colnumadd <- 1;
  } else {
    colnumadd <- 0;
    colNMadd <- NULL;
  }
  
  msg.vec <- c(msg.vec, paste("The instrument's mass accuracy is <b>", mSetObj$dataSet$instrument , "</b> ppm."));
  msg.vec <- c(msg.vec, paste("The instrument's analytical mode is <b>", mSetObj$dataSet$mode , "</b>."));
  msg.vec <- c(msg.vec, paste("The uploaded data contains <b>", length(colnames(rawdat)) + colnumadd, "</b> columns."));
  
  peakFormat <- mSetObj$paramSet$peakFormat;
  
  if (peakFormat == "rmp") {
    msg.vec <- c(msg.vec, paste("The peaks are ranked by <b>p-values</b>."));
  } else if (peakFormat == "rmt") {
    msg.vec <- c(msg.vec, paste("The peaks are ranked by <b>t-scores</b>."));
  }
  
  msg.vec <- c(msg.vec, paste("The column headers of uploaded data are <b>", paste(colnames(rawdat),collapse=", "), colNMadd ,"</b>."));
  msg.vec <- c(msg.vec, paste("The range of m/z peaks is trimmed to 50-2000. <b>", trim.num, "</b> features have been trimmed."));
  
  if(trim.num > 0){
    ndat <- ndat[my.inx,]
    #msg.vec <- c(msg.vec, paste("A total of", trim.num, "were excluded to fit within mz range of 50-2000"));
  }
  
  # remove duplicated mzs (make unique)
  dup.inx <- duplicated(ndat);
  dup.num <- sum(dup.inx);
  
  if(dup.num > 0){
    ndat <- ndat[!dup.inx,];
    msg.vec <- c(msg.vec, paste("A total of <b>", dup.num, "</b> duplicated mz features were removed."));
  }
  
  # make mzs unique
  mzs <- ndat[,2]
  # ensure features are unique
  mzs_unq <- mzs[duplicated(mzs)]
  set.seed(123);
  while(length(mzs_unq)>0){
    mzs[duplicated(mzs)] <- vapply(mzs_unq, function(x) {paste0(x, sample(1:9, 1, replace = TRUE))}, FUN.VALUE = character(1L));
    mzs_unq <- mzs[duplicated(mzs)]
  }
  
  ndat[,2] <- mzs
  ref_mzlist <- ndat[,2];
  
  # set up expression (up/dn)
  tscores <- as.numeric(ndat[,3]);
  names(tscores) <- ref_mzlist;
  
  # set up rt
  if(mSetObj$paramSet$mumRT){
    
    retention_time <- as.numeric(ndat[,4]);
    names(retention_time) <- ref_mzlist;
    mSetObj$dataSet$pos_inx <- as.numeric(ndat$pos_inx) == 1;
    mSetObj$dataSet$ret_time <- retention_time;
    
    if(is.na(mSetObj$dataSet$rt_tol)){
      rt_tol <- max(mSetObj$dataSet$ret_time) * mSetObj$dataSet$rt_frac 
      #print(paste0("Retention time tolerance is ", rt_tol))
      mSetObj$dataSet$rt_tol <- rt_tol
    }
    
  }else{
    mSetObj$dataSet$pos_inx <- as.numeric(ndat$pos_inx) == 1;
  }
  
  ref.size <- length(ref_mzlist);
  
  msg.vec <- c(msg.vec, paste("A total of ", ref.size, "input mz features were retained for further analysis."));
  
  if(ref.size > 20000){
    msg.vec <- c(msg.vec, "There are too many input features, the performance may be too slow.");
  }
  
  if(ref.size < 100){
    AddErrMsg("There are too few m/z features. Ensure that all of your m/z features have been uploaded!");
    return(0)
  }
  
  if(min(ndat[,"p.value"])<0 || max(ndat[,"p.value"])>1){
    msg.vec <- c(msg.vec, "Please make sure the p-values are between 0 and 1.");
  }
  
  ref_cmpdlist <- mSetObj$dataSet$cmpd.orig;
  if(!is.null(mSetObj[["paramSet"]][["ms2id.type"]])){
    
    if(mSetObj$paramSet$ms2id.type == "hmdb_ids"){
      if(any(grepl("HMDB[0-9]+",as.character(ref_cmpdlist)))){
        msg.vec <- c(msg.vec, paste0("A total of ",
                                     length(which(grepl("HMDB[0-9]+",as.character(ref_cmpdlist)))),
                                     " HMDB Compounds included."));
        mSetObj$paramSet$ContainsMS2 <- TRUE;
      } else {
        mSetObj$paramSet$ContainsMS2 <- FALSE;
        AddErrMsg("No HMDB compounds included. Please check your data!");
        return(0)
      }
    } else if(mSetObj$paramSet$ms2id.type == "pubchem_cids"){
      if(any(grepl("^[0-9]+",as.character(ref_cmpdlist)))){
        msg.vec <- c(msg.vec, paste0("A total of ",
                                     length(which(grepl("^[0-9]+", as.character(ref_cmpdlist)))),
                                     " PubChem_CID Compounds included."));
        mSetObj$paramSet$ContainsMS2 <- TRUE;
      } else {
        mSetObj$paramSet$ContainsMS2 <- FALSE;
        AddErrMsg("No PubChem_CID compounds included. Please check your data!");
        return(0)
      }
    } else if(mSetObj$paramSet$ms2id.type == "pubchem_sids"){
      if(any(grepl("^[0-9]+", as.character(ref_cmpdlist)))){
        msg.vec <- c(msg.vec, paste0("A total of ",
                                     length(which(grepl("^[0-9]+", as.character(ref_cmpdlist)))),
                                     " PubChem_SID Compounds included."))
        mSetObj$paramSet$ContainsMS2 <- TRUE;
      } else {
        mSetObj$paramSet$ContainsMS2 <- FALSE;
        AddErrMsg("No PubChem_SID compounds included. Please check your data!");
        return(0)
      }
    } else if(mSetObj$paramSet$ms2id.type == "inchikeys"){
      if(any(grepl("^[A-Z]+-[A-Z]+-[A-Z]$", as.character(as.matrix(ref_cmpdlist))))){
        msg.vec <- c(msg.vec, paste0("A total of ",
                                     length(which(grepl("^[A-Z]+-[A-Z]+-[A-Z]$", as.character(as.matrix(ref_cmpdlist))))),
                                     " InchiKeys Compounds included."))
        mSetObj$paramSet$ContainsMS2 <- TRUE;
      } else {
        mSetObj$paramSet$ContainsMS2 <- FALSE;
        AddErrMsg("No InchiKeys compounds included. Please check your data!");
        return(0)
      }
    } else if(mSetObj$paramSet$ms2id.type == "smiles"){
      if(any(grepl("^[A-Z]+", as.character(as.matrix(ref_cmpdlist))))){
        msg.vec <- c(msg.vec, paste0("A total of ",
                                     length(which(grepl("^[A-Z]+", as.character(as.matrix(ref_cmpdlist))))),
                                     " SMILES Compounds included."))
        mSetObj$paramSet$ContainsMS2 <- TRUE;
      } else {
        mSetObj$paramSet$ContainsMS2 <- FALSE;
        AddErrMsg("No SMILES compounds included. Please check your data!");
        return(0)
      }
    } else {
      stop("IDtype must be one of 'hmdb_ids', 'pubchem_cids', 'pubchem_sids', 'inchikeys', 'smiles'.") 
    }
  }
  if(!is.null(ref_cmpdlist)){
    mSetObj$dataSet$ref_cmpdlist <- as.matrix(ref_cmpdlist);
  }
  
  mSetObj$msgSet$check.msg <- c(mSetObj$msgSet$check.msg, read.msg, msg.vec);
  mSetObj$dataSet$mummi.proc <- ndat;
  mSetObj$dataSet$expr_dic <- tscores;
  mSetObj$dataSet$ref_mzlist <- ref_mzlist;

  return(.set.mSet(mSetObj));
}

#'Set the cutoff for mummichog analysis
#'@description Set the p-value cutoff for mummichog analysis.
#'@param mSetObj Input the name of the created mSetObj.
#'@param fraction fraction value for mummichog running
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export

SetMummichogPvalFromPercent <- function(mSetObj=NA, fraction){

  mSetObj <- .get.mSet(mSetObj);
  peakFormat <- mSetObj$paramSet$peakFormat;
  
  if(peakFormat %in% c("rmp", "rmt")){
    maxp <- 0;
  }else{
    pvals <- c(0.25, 0.2, 0.15, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001)
    ndat <- mSetObj$dataSet$mummi.proc;
    n <- floor(fraction*length(ndat[,"p.value"]))
    cutoff <- ndat[n+1,1]
    if(!any(pvals <= cutoff)){
      maxp <- 0.00001
    }else{
      maxp <- max(pvals[pvals <= cutoff])
    }
  }

  mSetObj$dataSet$cutoff <- maxp
  .set.mSet(mSetObj);
  return(SetMummichogPval(NA, maxp));
}

#'Set the cutoff for mummichog analysis
#'@description Set the p-value cutoff for mummichog analysis.
#'@param mSetObj Input the name of the created mSetObj.
#'@param cutoff cutoff value for mummichog running
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export

SetMummichogPval <- function(mSetObj=NA, cutoff){
  
  mSetObj <- .get.mSet(mSetObj);

  msg.vec <- NULL;
  mSetObj$dataSet$cutoff <- cutoff;
  ndat <- mSetObj$dataSet$mummi.proc;
  msg.vec <- c(msg.vec, "Only a small percentage (below 10%) peaks in your input peaks should be significant.");
  msg.vec <- c(msg.vec, "The algorithm works best for <u>200~500 significant peaks in 3000~7000 total peaks</u>.");
  
  ref_mzlist <- ndat[,2];
  if(mSetObj$paramSet$mumDataContainsPval == 1){
    my.inx <- ndat[,1] <= cutoff;
    input_mzlist <- ref_mzlist[my.inx];
    # note, most of peaks are assumed to be not changed significantly, more than 25% should be warned
    
  }else{
    inxToKeep = length(ref_mzlist)/10
    if(inxToKeep > 500){
      inxToKeep = 500;
    }
    input_mzlist <- ref_mzlist[1:inxToKeep];
  }
  
  sig.size <- length(input_mzlist);
  sig.part <- round(100*sig.size/length(ref_mzlist),2);

  if(sig.size == 0){
    AddErrMsg("No significant features were found based on the current cutoff! Please adjust the p-value threshold.");
    return(0);
  }

  #  less than 1%
  if(sig.size < 11 || sig.part < 1){
    AddErrMsg(paste("The number of significant features is too small: ", sig.size, " (", sig.part, "%) for meaningful enrichment analysis. Please adjust the p-value threshold."));
    return(0);
  }
  
  if(sig.size > 2000){
    msg.vec <- c(msg.vec, "There are too many significant features based on the current cutoff, analysis will possibly be slow.");

  }else if(sig.part > 25){
    msg.vec <- c(msg.vec, paste("<font color=\"orange\">Warning: over", sig.part, "percent were significant based on your cutoff</font>."));
    msg.vec <- c(msg.vec, "You should adjust p-value cutoff to control the percentage");

  }else{
    msg.vec <- c(msg.vec, paste("A total of", sig.size, "or", sig.part, "percent significant mz features were found based on the selected p-value cutoff:", cutoff));
  }

  mSetObj$dataSet$input_mzlist <- input_mzlist;
  mSetObj$dataSet$N <- sig.size;
  
  if(.on.public.web){
    mSet <<- mSetObj;
    return(round(sig.part, 0))
  } else {
    return(.set.mSet(mSetObj));
  }
}

## For meta-analysis, set the p-value
## cutoff used to define the new
## input_mzlist
SetMetaPeaksPvals <- function(mSetObj=NA){
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$dataSet$cutoff <- cutoff;
  return(.set.mSet(mSetObj));
}

#'Function to perform peak set enrichment analysis
#'@description This is the main function that performs either the mummichog
#'algorithm, GSEA, or both for peak set enrichment analysis. 
#'@usage PerformPSEA(mSetObj=NA, lib, libVersion, minLib, permNum = 100)
#'@param mSetObj Input the name of the created mSetObj object. 
#'@param lib Input the name of the organism library, default is hsa_mfn. 
#'@param libVersion Input the version of the KEGG pathway libraries ("current" or "old").
#'@param minLib Numeric, input the minimum number of metabolites needed to consider the pathway 
#'or metabolite set. 
#'@param permNum Numeric, input the number of permutations to perform. Default is 100.
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import qs

PerformPSEA <- function(mSetObj=NA, lib, libVersion, minLib = 3, permNum = 100, init=T){
  
  mSetObj <- .get.mSet(mSetObj);
  mSetObj$initPSEA <- init;
  .set.mSet(mSetObj);
  mSetObj <- .setup.psea.library(mSetObj, lib, libVersion, minLib);
  version <- mSetObj$paramSet$version;
  mSetObj$dataSet$paramSet <- mSetObj$paramSet;
  if(mSetObj$paramSet$mumRT & version=="v2"){
    mSetObj <- .init.RT.Permutations(mSetObj, permNum)
  } else {
    mSetObj <- .init.Permutations(mSetObj, permNum)
  }

  if(class(mSetObj) != "list"){
    if(mSetObj == 0){
      AddErrMsg("MS Peaks to Paths analysis failed! Likely not enough m/z to compound hits for pathway analysis!")
      return(0)
    }
  }
  return(.set.mSet(mSetObj));
}

#' Internal function to perform PSEA, no retention time
#' @importFrom data.table setDF
#' @noRd
.init.Permutations <- function(mSetObj, permNum){
  anal.type0 <- mSetObj$paramSet$anal.type;

  if(anal.type0 == "mummichog"){
    mSetObj <- .perform.mummichogPermutations(mSetObj, permNum);
    mSetObj <- .compute.mummichogSigPvals(mSetObj);
  } else if(anal.type0 == "gsea_peaks") {
    mSetObj <- .compute.mummichog.fgsea(mSetObj, permNum);
  } else {
    # need to perform mummichog + gsea then combine p-values
    mSetObj <- .compute.mummichog.fgsea(mSetObj, permNum);
    mSetObj <- .perform.mummichogPermutations(mSetObj, permNum);
    mSetObj <- .compute.mummichogSigPvals(mSetObj);
    pathResults <- vector("list")
    
    pathResults$ora.all <- mSetObj$mummi.resmat
    pathResults$gsea.all <- mSetObj$mummi.gsea.resmat
    
    path.names.all <- lapply(pathResults, rownames)
    path.intersect <- Reduce(intersect, path.names.all)
    
    # remove paths not found by all three - complete.cases
    path.intersected <- lapply(pathResults, function(x) x[row.names(x) %in% path.intersect,])
    mum.df <- data.table::setDF(Reduce(merge, 
                                      lapply(path.intersected, 
                                             data.table::data.table, 
                                             keep.rownames = TRUE)))
    
    mum.df <- mum.df[, c("rn", "Pathway total", "Hits.total", "Hits.sig", "FET", "P_val")]
    rownames(mum.df) <- mum.df$rn;
    # combine p-values
    # replace 1 with 0.999 and 0 with low number
    mum.df[,5:6][mum.df[,5:6]==1] <- 0.999
    mum.df[,5:6][mum.df[,5:6]==0] <- .Machine$double.xmin
    
    combo.all <- apply(mum.df[,5:6], 1, function(x) sumlog(x))
    
    #extract p-values
    all.ps <- unlist(lapply(combo.all, function(z) z["p"]))
    df.combo <- as.matrix(mum.df[,2:6])
    dfcombo <- round(cbind(df.combo, all.ps), 5)
    colnames(dfcombo) <- c("Total_Size", "Hits", "Sig_Hits", "Mummichog_Pvals", "GSEA_Pvals", "Combined_Pvals")
    ord.inx <- order(dfcombo[,6]);
    dfcombo <- signif(as.matrix(dfcombo[ord.inx, ]), 4);
    
    fast.write.csv(dfcombo, "mummicho_pathway_enrichment_integ.csv", row.names = TRUE)
    if( is.null(mSetObj$initPSEA) || mSetObj$initPSEA){
        mSetObj$integ.resmat <- dfcombo;
        mSetObj$paramSet$integ.lib <- mSetObj$lib.organism;
    }

    matched_cpds <- names(mSetObj$cpd_exp)
    colnames(mum.df)[1] = "pathways";
    inx2<-na.omit(match(mum.df$pathways[ord.inx], mSetObj$pathways$name))
    filt_cpds <- lapply(inx2, function(f) { mSetObj$pathways$cpds[f] })
    
    cpds <- lapply(filt_cpds, function(x) intersect(unlist(x), matched_cpds))
    cpds_sig <- lapply(filt_cpds, function(x) intersect(unlist(x), mSetObj$input_cpdlist))
    
    mSetObj$path.nms <- rownames(dfcombo)
    mSetObj$path.hits <- convert2JsonList(cpds)
    mSetObj$path.pval <- as.numeric(dfcombo[,6])
    mSetObj <<- mSetObj;
    matched_res <- qs::qread("mum_res.qs");
    json.res <- list(
      cmpd.exp = mSetObj$cpd_exp,
      path.nms = rownames(dfcombo),
      hits.all = convert2JsonList(cpds),
      hits.all.size = as.numeric(dfcombo[,2]),
      hits.sig = convert2JsonList(cpds_sig),
      hits.sig.size = as.numeric(dfcombo[,3]),
      mum.p = as.numeric(dfcombo[,4]),
      gsea.p = as.numeric(dfcombo[,5]),
      comb.p = as.numeric(dfcombo[,6]),
      peakToMet = mSetObj$cpd_form_dict,
      peakTable = matched_res
    );
    
    json.mat <- RJSONIO::toJSON(json.res);
    sink(mSetObj$mum_nm);
    cat(json.mat);
    sink();

    if(is.null(mSetObj$imgSet$enrTables)){
        mSetObj$imgSet$enrTables <- list();
    }
    vis.type <- "mumEnr";
    resTable <- dfcombo[,c(2:6)];
    resTable <- cbind(Name=rownames(dfcombo),resTable);
    mSetObj$imgSet$enrTables[[vis.type]] <- list();
    mSetObj$imgSet$enrTables[[vis.type]]$table <- dfcombo;
    mSetObj$imgSet$enrTables[[vis.type]]$library <- mSetObj$lib.organism;
    mSetObj$imgSet$enrTables[[vis.type]]$algo <- "GSEA and Mummichog integrative Analysis";
    }
  return(mSetObj);
}

#' Internal function to perform PSEA, with RT
#' @noRd
.init.RT.Permutations <- function(mSetObj, permNum){
  
  anal.type0 <- mSetObj$paramSet$anal.type;
  
  if(anal.type0 == "mummichog"){
    mSetObj <- .perform.mummichogRTPermutations(mSetObj, permNum);
    mSetObj <- .compute.mummichogRTSigPvals(mSetObj);
  }else if(anal.type0 == "gsea_peaks"){
    mSetObj <- .compute.mummichog.RT.fgsea(mSetObj, permNum);
  }else{
    # need to perform mummichog + gsea then combine p-values
    mSetObj <- .compute.mummichog.RT.fgsea(mSetObj, permNum);
    mSetObj <- .perform.mummichogRTPermutations(mSetObj, permNum);
    mSetObj <- .compute.mummichogRTSigPvals(mSetObj);
    
    pathResults <- vector("list")
    
    pathResults$ora.all <- mSetObj$mummi.resmat
    pathResults$gsea.all <- mSetObj$mummi.gsea.resmat
    
    path.names.all <- lapply(pathResults, rownames)
    path.shared <- Reduce(intersect, path.names.all)
    
    # remove paths not found by all three - complete.cases
    path.intersected <- lapply(pathResults, function(x) x[row.names(x) %in% path.shared,])
    mum.df <- data.table::setDF(Reduce(merge, lapply(path.intersected, data.table::data.table, keep.rownames = TRUE)))
    
    mum.df <- mum.df[, c(1:4, 6, 14)]
    rownames(mum.df) <- mum.df$rn
    mum.df <- mum.df[,-1]
    colnames(mum.df) <- c("Total_Size", "Hits", "Sig_Hits", "Mummichog_Pvals", "GSEA_Pvals")
    
    # combine p-values
    combo.all <- apply(mum.df[,c("Mummichog_Pvals", "GSEA_Pvals")], 1, function(x) sumlog(x))
    
    #extract p-values
    all.ps <- unlist(lapply(combo.all, function(z) z["p"]))
    df.combo <- as.matrix(mum.df)
    df.combo <- round(cbind(df.combo, all.ps), 5)
    colnames(df.combo) <- c("Total_Size", "Hits", "Sig_Hits", "Mummichog_Pvals", "GSEA_Pvals", "Combined_Pvals")
    ord.inx <- order(df.combo[,6]);
    df.combo <- signif(as.matrix(df.combo[ord.inx, ]), 4);
    fast.write.csv(df.combo, "mummicho_pathway_enrichment_integ.csv", row.names = TRUE)
    mSetObj$integ.resmat <- df.combo
    
    ## transform ecpd to cpd for json files
    # for all cpds
    total_ecpds <- unique(mSetObj$total_matched_ecpds) #all matched compounds
    current.mset <- mSetObj$pathways$emp_cpds[match(rownames(df.combo), mSetObj$pathways$name)]
    ecpds <- lapply(current.mset, function(x) intersect(x, total_ecpds)); #pathways & all ref ecpds
    cpds <- lapply(ecpds, function(x) unique(unlist(mSetObj$ecpd_cpd_dict[match(x, names(mSetObj$ecpd_cpd_dict))])) )
    
    # for sig ecpds
    qset <- unique(unlist(mSetObj$input_ecpdlist));
    current.mset <- mSetObj$pathways$emp_cpds[match(rownames(df.combo), mSetObj$pathways$name)]
    feats <- lapply(current.mset, function(x) intersect(x, qset));
    cpds_feats <- lapply(feats, function(x) unique(unlist(mSetObj$ecpd_cpd_dict[match(x, names(mSetObj$ecpd_cpd_dict))])) )  
    
    # now make exp vec for all compounds
    cpds2ec <- mSetObj$cpd_ecpd_dict
    cpds.all <- unique(unlist(mSetObj$ecpd_cpd_dict[match(total_ecpds, names(mSetObj$ecpd_cpd_dict))]))
    cpds.exp <- sapply(cpds.all, function(x) sapply(seq_along(x), function(i) mean(mSetObj$ec_exp[match(unique(unlist(cpds2ec[match(x[[i]], names(cpds2ec))])), names(mSetObj$ec_exp))]) ) )
    
    mSetObj$path.nms <- rownames(df.combo)
    mSetObj$path.hits <- convert2JsonList(cpds)
    mSetObj$path.pval <- as.numeric(df.combo[,6])
    
    mSetObj <<- mSetObj;
    matched_res <- qs::qread("mum_res.qs");
    
    json.res <- list(
      cmpd.exp = cpds.exp,
      path.nms = rownames(df.combo),
      hits.all = convert2JsonList(cpds),
      hits.all.size = as.numeric(df.combo[,2]),
      hits.sig = convert2JsonList(cpds_feats),
      hits.sig.size = as.numeric(df.combo[,3]),
      mum.p = as.numeric(df.combo[,4]),
      gsea.p = as.numeric(df.combo[,5]),
      comb.p = as.numeric(df.combo[,6]),
      peakToMet = mSetObj$cpd_form_dict,
      peakTable = matched_res
    );
    
    json.mat <- RJSONIO::toJSON(json.res);
    sink(mSetObj$mum_nm);
    cat(json.mat);
    sink();
  }
  return(mSetObj);
}

# Internal function to set up library for PSEA
.setup.psea.library <- function(mSetObj = NA, 
                                lib, 
                                libVersion, 
                                minLib=3, 
                                metaAnalysis = FALSE,
                                metaLevel = "pathway", 
                                combine.level,
                                pval.method, 
                                es.method, 
                                rank.metric, 
                                mutual.feats = TRUE){
  
  version <- mSetObj$paramSet$version;
  filenm <- paste(lib, ".qs", sep="")
  biocyc <- grepl("biocyc", lib);

  if(!is.null(mSetObj$curr.cust)){
    
    if(biocyc){
      user.curr <- mSetObj$curr.map$BioCyc;
    }else{
      user.curr <- mSetObj$curr.map$KEGG;
    }

    if(.on.public.web){
        currency <<- user.curr;
    } else {
        currency_r <<- user.curr;
    }
    
    if(.on.public.web){
        currency_tmp <- currency;
    } else {
        currency_tmp <- currency_r;
    }

    if(length(currency_tmp)>0){
      mSetObj$mummi$anal.msg <- c("Currency metabolites were successfully uploaded!")
    }else{
      mSetObj$mummi$anal.msg <- c("Errors in currency metabolites uploading!")
    }
  }
 
  if(.on.public.web){
    mum.url <- paste("../../libs/mummichog/", filenm, sep="");
    print(paste("Adding mummichog library:", mum.url));
    mummichog.lib <- try(qs::qread(mum.url), silent = TRUE)
    if(class(mummichog.lib) == "try-error"){
        AddErrMsg("The database you have selected is not matched/found!")
        return(0)
    }

  }else{
    if(!file.exists(filenm)){
      mum.url <- paste("https://www.metaboanalyst.ca/resources/libs/mummichog/", filenm, sep="");
      download.file(mum.url, destfile = filenm, method="libcurl", mode = "wb")
      mummichog.lib <- qs::qread(filenm);
    }else{
      mummichog.lib <- qs::qread(filenm);
    }
  }
  
  
  ## confirming ms2 id type
  if(is.null(mSetObj$paramSet$ContainsMS2)){mSetObj$paramSet$ContainsMS2<-FALSE}
  if(mSetObj$paramSet$ContainsMS2){
    ms2id <- mSetObj$paramSet$ms2id.type;
    if(!(ms2id %in% c("hmdb_ids", "inchikeys", "pubchem_CIDs", "pubchem_SIDs", "SMILES"))){
      if(.on.public.web){
        AddErrMsg("The MS2 ID  you have selected is not correct!")
        return(0)
      }
      warning("MS2 ID type must be one of 'hmdb_ids', 'inchikeys', 'pubchem_CIDs', 'pubchem_SIDs', 'SMILES'.")
      stop("Please redefine MS2 ID with function 'SetMS2IDType'.")
    }
    ## checking adducts info - with MS2 INFO
    if(!is.null(mSetObj$dataSet$adduct.custom)){
      mw <- mummichog.lib$cpd.lib$mw;
      new_adducts <- new_adduct_mzlist(mSetObj, mw);
      
      cpd.lib <- list(
        mz.matp = new_adducts$pos,
        mz.matn = new_adducts$neg,
        mw = mummichog.lib$cpd.lib$mw,
        id = mummichog.lib$cpd.lib$id,
        name = mummichog.lib$cpd.lib$name,
        ms2IDs = mummichog.lib$cpd.lib[[ms2id]]
      );
      
    } else {
      
      cpd.lib <- list(
        mz.matp = mummichog.lib$cpd.lib$adducts[["positive"]],
        mz.matn = mummichog.lib$cpd.lib$adducts[["negative"]],
        mw = mummichog.lib$cpd.lib$mw,
        id = mummichog.lib$cpd.lib$id,
        name = mummichog.lib$cpd.lib$name,
        ms2IDs = mummichog.lib$cpd.lib[[ms2id]]
      );
    }
  } else {
    ## checking adducts info - nonMS2
    if(!is.null(mSetObj$dataSet$adduct.custom)){
      mw <- mummichog.lib$cpd.lib$mw;
      new_adducts <- new_adduct_mzlist(mSetObj, mw);
      
      cpd.lib <- list(
        mz.matp = new_adducts$pos,
        mz.matn = new_adducts$neg,
        mw = mummichog.lib$cpd.lib$mw,
        id = mummichog.lib$cpd.lib$id,
        name = mummichog.lib$cpd.lib$name
      );
      
    }else{
      
      cpd.lib <- list(
        mz.matp = mummichog.lib$cpd.lib$adducts[["positive"]],
        mz.matn = mummichog.lib$cpd.lib$adducts[["negative"]],
        mw = mummichog.lib$cpd.lib$mw,
        id = mummichog.lib$cpd.lib$id,
        name = mummichog.lib$cpd.lib$name
      );
    }
  }
  
  cpd.treep <- mummichog.lib$cpd.tree[["positive"]];
  cpd.treen <- mummichog.lib$cpd.tree[["negative"]];
  
  # filter pathways based on the length of pathway library
  # build empirical compound library after
  path.length <- sapply(mummichog.lib$pathways$cpds, length)
  
  min.inx <- which(path.length >= minLib)
  
  cleaned.pathways <- vector("list")
  cleaned.pathways$cpds <- mummichog.lib$pathways$cpds[min.inx]
  cleaned.pathways$id <- mummichog.lib$pathways$id[min.inx]
  cleaned.pathways$name <- mummichog.lib$pathways$name[min.inx]
  
  mSetObj$pathways <- cleaned.pathways;
  
  if(metaAnalysis & metaLevel %in% c("cpd", "ec")){
    mSetObj <- .search.compoundLibMeta(mSetObj, cpd.lib, cpd.treep, cpd.treen, metaLevel, combine.level,
                                       pval.method, es.method, rank.metric, mutual.feats);
  } else if (mSetObj$paramSet$ContainsMS2) {
    mSetObj <- .search_MS2compoundLib(mSetObj, cpd.lib, cpd.treep, cpd.treen);
  } else {
    mSetObj <- .search.compoundLib(mSetObj, cpd.lib, cpd.treep, cpd.treen);
  }

  if(mSetObj$paramSet$mumRT & version=="v2"){
    # only for empirical compounds
    if(metaLevel %in% c("ec", "pathway", "pooled")){
      # map cpds to empirical cpds
      cleaned.pathways$emp_cpds <- lapply(cleaned.pathways$cpds, 
                                          function(x) {
                                            unique(unlist(mSetObj$cpd_ecpd_dict[na.omit(match(x, names(mSetObj$cpd_ecpd_dict)))]))
                                          })

      # delete emp_cpds, cpds and names with no emp_cpds
      null.inx <- sapply(cleaned.pathways$emp_cpds, is.null)
     
      new_pathways <- vector("list");
      
      new_pathways$cpds <- cleaned.pathways$cpds[!null.inx]
      new_pathways$name <- cleaned.pathways$name[!null.inx]
      new_pathways$emp_cpds <- cleaned.pathways$emp_cpds[!null.inx]
      
      mSetObj$pathways <- new_pathways;
    }
  }
  
  mSetObj$lib.organism <- lib; #keep track of lib organism for sweave report
  return(mSetObj);
}

# version 2
# internal function for searching compound library
.search.compoundLib <- function(mSetObj, 
                                cpd.lib, 
                                cpd.treep, 
                                cpd.treen){

  ref_mzlist <- as.numeric(mSetObj$dataSet$ref_mzlist);
  print(paste0("compoundLib"));
  print(paste0("Got ", length(ref_mzlist), " mass features."))
  pos_inx <- mSetObj$dataSet$pos_inx;
  ref_mzlistp <- ref_mzlist[pos_inx];
  ref_mzlistn <- ref_mzlist[!pos_inx];
  version <- mSetObj$paramSet$version;
  
  # for empirical compounds
  if(mSetObj$paramSet$mumRT & version=="v2"){
    ord_rt <- rank(mSetObj$dataSet$ret_time, ties.method = "random")
    ret_time_pos <- mSetObj$dataSet$ret_time[pos_inx];
    ret_time_rank_pos <- ord_rt[pos_inx];
    ret_time_neg <- mSetObj$dataSet$ret_time[!pos_inx];
    ret_time_rank_neg <- ord_rt[!pos_inx];
    rt_tol <- mSetObj$dataSet$rt_tol;
    rt_tol_rank <- length(ref_mzlist)*mSetObj$dataSet$rt_frac;
  } else {
    # add fake RT
    ret_time_pos <- rep(1, length(ref_mzlistp))
    ret_time_rank_pos <- rep(1, length(ref_mzlistp))
    ret_time_neg <- rep(1, length(ref_mzlistn))
    ret_time_rank_neg <- rep(1, length(ref_mzlistn))
  }
  
  modified.statesp <- colnames(cpd.lib$mz.matp);
  modified.statesn <- colnames(cpd.lib$mz.matn);
  my.tolsp <- mz_tolerance(ref_mzlistp, mSetObj$dataSet$instrument);
  my.tolsn <- mz_tolerance(ref_mzlistn, mSetObj$dataSet$instrument);
  
  # get mz ladder (pos index)
  self.mzsp <- floor(ref_mzlistp);
  all.mzsp <- cbind(self.mzsp-1, self.mzsp, self.mzsp+1);
  
  self.mzsn <- floor(ref_mzlistn);
  all.mzsn <- cbind(self.mzsn-1, self.mzsn, self.mzsn+1);
  
  # matched_res will contain detailed result (cmpd.id. query.mass, mass.diff) for all mz;
  # use a high-performance variant of list
  matched_resp <- myFastList();
  matched_resn <- myFastList();
  
  if(mSetObj$dataSet$mode != "negative"){
    for(i in 1:length(ref_mzlistp)){
      mz <- ref_mzlistp[i];
      rt <- ret_time_pos[i];
      rt_rank <- ret_time_rank_pos[i];
      my.tol <- my.tolsp[i];
      all.mz <- all.mzsp[i,];
      pos.all <- as.numeric(unique(unlist(cpd.treep[all.mz])));
      
      for(pos in pos.all){
        id <- cpd.lib$id[pos];
        mw.all <- cpd.lib$mz.matp[pos,]; #get modified mzs
        diffs <- abs(mw.all - mz); #modified mzs - mz original
        hit.inx <- which(diffs < my.tol);
        if(length(hit.inx)>0){
          for(spot in 1:length(hit.inx)){
            hit.pos <- hit.inx[spot];# need to match all
            index <- paste(mz, id, rt, hit.pos, sep = "___");
            matched_resp$add(index, c(i, id, mz, rt, rt_rank, mw.all[hit.pos], modified.statesp[hit.pos], diffs[hit.pos])); #replaces previous when hit.inx>1
          }
        }
      }
    }
  }
  
  all.mzsn <<- all.mzsn
  
  if (mSetObj$dataSet$mode != "positive") {
    for(i in 1:length(ref_mzlistn)){
      mz <- ref_mzlistn[i];
      rt <- ret_time_neg[i];
      rt_rank <- ret_time_rank_neg[i];
      my.tol <- my.tolsn[i];
      all.mz <- all.mzsn[i,];
      pos.all <- as.numeric(unique(unlist(cpd.treen[all.mz])));
      
      for(pos in pos.all){
        id <- cpd.lib$id[pos]; # position of compound in cpd.tree
        mw.all <- cpd.lib$mz.matn[pos,]; #get modified mzs
        diffs <- abs(mw.all - mz); #modified mzs - mz original
        hit.inx <- which(diffs < my.tol);
        if(length(hit.inx)>0){
          for(spot in 1:length(hit.inx)){
            hit.pos <- hit.inx[spot];# need to match all
            index <- paste(mz, id, rt, hit.pos, sep = "___"); #name in fast_list
            matched_resn$add(index, c(i, id, mz, rt, rt_rank, mw.all[hit.pos], modified.statesn[hit.pos], diffs[hit.pos])); #replaces previous when hit.inx>1
          }
        }
      }
    }
  }
  
  # convert to regular list
  if (mSetObj$dataSet$mode == "mixed") {
    
    matched_resn <- matched_resn$as.list();
    matched_resp <- matched_resp$as.list();
    
    neg_matches <- length(matched_resn) > 0
    pos_matches <- length(matched_resp) > 0
    
    if(!neg_matches & !pos_matches){
      msg.vec <<- "No compound matches from upload peak list!"
      return(0)
    }
    
    if(neg_matches){
      matched_resn <- data.frame(matrix(unlist(matched_resn), nrow=length(matched_resn), byrow=T), stringsAsFactors = FALSE);
    }
    
    if(pos_matches){
      matched_resp <- data.frame(matrix(unlist(matched_resp), nrow=length(matched_resp), byrow=T), stringsAsFactors = FALSE);
    }
    
    if(neg_matches & pos_matches){ # both w. matches
      matched_res <- rbind(matched_resp, matched_resn)
    }else if(neg_matches & !pos_matches){ # only neg w. matches
      matched_res <- matched_resn
    }else{ # only pos w. matches
      matched_res <- matched_resp
    }
    
  } else if(mSetObj$dataSet$mode == "positive") {
    matched_resp <- matched_resp$as.list();
    
    if(is.null(unlist(matched_resp))){
      msg.vec <<- "No compound matches from upload peak list!"
      return(0)
    }
    
    matched_resp <- data.frame(matrix(unlist(matched_resp), nrow=length(matched_resp), byrow=T), stringsAsFactors = FALSE);
    matched_res <- matched_resp;
    
  } else {
    matched_resn <- matched_resn$as.list();
    if(is.null(unlist(matched_resn))){
      msg.vec <<- "No compound matches from upload peak list!"
      return(0)
    }
    
    matched_resn <- data.frame(matrix(unlist(matched_resn), nrow=length(matched_resn), byrow=T), stringsAsFactors = FALSE);
    matched_res <- matched_resn
  }
  
  # re-order columns for output
  matched_res <- matched_res[, c(3,2,7,8,4,5)];
  colnames(matched_res) <- c("Query.Mass", "Matched.Compound", "Matched.Form", "Mass.Diff", "Retention.Time", "RT.Rank");
  
  if(!mSetObj$paramSet$mumRT & version=="v2"){
    matched_res <- matched_res[,-(5:6)]
  }
  
  #print(paste0(length(unique(matched_res[,2])), " matched compounds! cpd2mz"))
  
  # now create empirical compounds if necessary!
  # 1 compound matches to multiple m/z, filter by RT 
  if(mSetObj$paramSet$mumRT & version=="v2"){
    start <- Sys.time()
    # mz, ion
    empirical.cpd.list <- split(matched_res[,c(1,3,5,6)], matched_res[,2]); # split mz, ion and rt by compound
    empirical.cpds2cpds <- vector(length=(length(empirical.cpd.list)), "list")
    names(empirical.cpds2cpds) <- names(empirical.cpd.list)
    
    # for each compound, if multiple matches, split into ECpds if > RT tolerance - rt_tol
    for(i in 1:length(empirical.cpd.list)){
      
      mzs <- empirical.cpd.list[[i]]$Query.Mass
      ions <- empirical.cpd.list[[i]]$Matched.Form
      rts <- empirical.cpd.list[[i]]$Retention.Time
      rt.rank <- empirical.cpd.list[[i]]$RT.Rank
      cpds <- names(empirical.cpd.list)[i]
      
      # first, for each compound, determine ECs among matched ions
      if(length(mzs)>1){ # if multiple ECs per compound
        
        # first group together to create empirical cpds by rt
        rts <- as.numeric(rts)
        names(rts) <- paste0(mzs, ";", ions, ";", rts, ";", cpds)
        rts <- sort(rts)
        
        # second, group together to create empirical cpds by rt rank
        rt.ranks <- as.numeric(rt.rank)
        names(rt.ranks) <- paste0(mzs, ";", ions, ";", rts, ";", cpds)
        rt.ranks <- sort(rt.ranks)
        
        split.inx <- c(0, cumsum(Reduce("&", list(abs(diff(rts)) > rt_tol, abs(diff(rt.ranks)) > rt_tol_rank) )))
        
        # need to deal w. multiple rts but only 1 EC
        if(length(unique(split.inx)) > 1){
          e.cpds <- split(rts, split.inx)
          empirical.cpds2cpds[[i]] <- lapply(e.cpds, names)
        }else{
          empirical.cpds2cpds[[i]] <- paste0(names(rts), collapse="__")
        }
        
      }else{ # if only 1 EC per compound
        empirical.cpds2cpds[[i]] <- paste0(mzs, ";", ions, ";", rts, ";", cpds)
      }
    }
    
    initial_ecs <- unlist(empirical.cpds2cpds, recursive=FALSE)
    names(initial_ecs) <- paste0("EC", 1:length(initial_ecs))
    print(paste0(length(initial_ecs), " initial ECs created!"))
    
    # second, merge ECs if same m/z and form - append compounds
    try <- melt(initial_ecs)
    try2 <- strsplit(as.character(try[,1]), split="__", fixed=TRUE) # deals with multiple rts belonging to 1 EC
    try2.df <- data.frame(value=unlist(try2), L1 = rep(try$L1, sapply(try2, length)))
    
    info <- strsplit(as.character(try2.df[,1]), split=";")
    df_ecs <- data.frame(ec=as.character(try2.df[,2]), mz=sapply(info, `[[`, 1), form=sapply(info, `[[`, 2), rt = sapply(info, `[[`, 3), cpd = sapply(info, `[[`, 4), stringsAsFactors = F)
    df_ecs$str_row_inx <- paste(df_ecs$mz, df_ecs$form, df_ecs$rt, sep = "___")
    qs::qsave(df_ecs, "initial_ecs.qs")
    merged_ecs <- aggregate(. ~ str_row_inx, df_ecs, paste, collapse=";")
    
    # cleaning the df
    # merged_ecs$ec <- sapply(strsplit(merged_ecs$ec, ";", fixed=TRUE), function(x) unlist(x)[1]) - keep as long name
    merged_ecs$mz <- sapply(strsplit(merged_ecs$mz, ";", fixed=TRUE), function(x) unique(unlist(x)))
    merged_ecs$form <- sapply(strsplit(merged_ecs$form, ";", fixed=TRUE), function(x) unique(unlist(x)))
    merged_ecs$rt <- sapply(strsplit(merged_ecs$rt, ";", fixed=TRUE), function(x) unique(unlist(x)))
    print(paste0(length(unique(merged_ecs$ec)), " merged ECs identified!"))
    
    # third, check if primary ion is present
    # needs to be per EC!
    if(mSetObj$dataSet$primary_ion=="yes"){
      
      ecs <- unique(merged_ecs$ec);
      
      # function to group ECs and verify if contains primary ion
      new_info <- lapply(ecs, function(x) { 
        new_info <- merged_ecs[which(merged_ecs$ec == x),] # subset merged_ecs to rows containing ECx
        primary.inx <- length(intersect(new_info$form, primary_ions))
        
        if(primary.inx>0){
          new_info <- new_info
        }else{
          new_info <- NULL
        }
        new_info
      })  
      
      final_ecs <- do.call(args=new_info, what=rbind)[,-1]
      
    }else{
      final_ecs <- merged_ecs[,-1]
    }
    
    colnames(final_ecs) <- c("Empirical.Compound", "Query.Mass", "Matched.Form", "Retention.Time", "Matched.Compound")
    
    # transform to long format
    cpd_split <- strsplit(as.character(final_ecs$Matched.Compound), ";", fixed=TRUE)
    reps <- pmax(lengths(cpd_split))
    df2 <- final_ecs[rep(1:nrow(final_ecs), reps), 1:4]
    df2$Matched.Compound <- unlist(mapply(function(x,y) c(x, rep(NA, y)), cpd_split, reps-lengths(cpd_split)))
    
    matched_res <- merge(matched_res, df2)
    matched_res <- matched_res[,-6] #rm rt rank
    matched_res[,6] <- as.character(matched_res[,6])
    
    # now deal with the fact that if at least one EC overlap, need to count as same EC per compound...
    my_final_cpds <- aggregate(. ~ Matched.Compound, matched_res, paste, collapse="___")
    my_final_cpds_list <- lapply(split(my_final_cpds$Empirical.Compound, my_final_cpds$Matched.Compound), unlist)
    
    cpd2ec1 <- lapply(seq_along(my_final_cpds_list), function(x) { # function used to make grouping of ecs per cpd
      
      ecs <- unlist(strsplit(my_final_cpds_list[[x]], "___", fixed=TRUE))
      
      if(length(ecs) > 1){
        ecs.list <- as.list(strsplit(ecs, ";", fixed=TRUE))
        library(igraph)
        m = sapply(ecs.list, function(x) sapply(ecs.list, function(y) length(intersect(x,y))>0))
        g = igraph::groups(components(graph_from_adjacency_matrix(m)))
        ecs <- paste0(sapply(g, function(z) paste0(ecs[z], collapse = "|") ), collapse = "___")
      }
      ecs
    })
    
    names(cpd2ec1) <- names(my_final_cpds_list)
    
    update_ecs <- lapply(seq_along(cpd2ec1), function(z) {
      
      ecs.old <- unlist(strsplit(my_final_cpds_list[[z]], "___", fixed=TRUE))
      ecs.new <- unlist(strsplit(cpd2ec1[[z]], "___", fixed=TRUE))
      
      for(i in seq_along(ecs.new)){
        pattern <- ecs.new[i]
        pattern_vec <- unlist(strsplit(pattern, "\\|"))
        up.pattern <- paste0(unique(pattern_vec), collapse = "|")
        ecs.old[ ecs.old %in% pattern_vec  ] <- up.pattern
      }
      
      ecs.old <- paste0(ecs.old, collapse = "___")
      ecs.old
    })
    
    updated_ecs <- do.call(rbind, update_ecs)
    my_final_cpds$Empirical.Compound <- updated_ecs
    
    new_dt <- data.table::data.table(my_final_cpds)
    new_dt <- new_dt[, list(Query.Mass = unlist(strsplit(as.character(Query.Mass), "___", fixed=TRUE)), 
                            Matched.Form = unlist(strsplit(as.character(Matched.Form), "___", fixed=TRUE)),
                            Retention.Time = unlist(strsplit(as.character(Retention.Time), "___", fixed=TRUE)),
                            Mass.Diff = unlist(strsplit(as.character(Mass.Diff), "___", fixed=TRUE)),
                            Empirical.Compound = unlist(strsplit(as.character(Empirical.Compound), "___", fixed=TRUE))),
                     by = Matched.Compound]
    
    matched_res <- data.frame(Query.Mass = new_dt$Query.Mass, Matched.Compound = new_dt$Matched.Compound, Matched.Form = new_dt$Matched.Form,
                              Retention.Time = new_dt$Retention.Time, Mass.Diff = new_dt$Mass.Diff, Empirical.Compound = new_dt$Empirical.Compound, stringsAsFactors = FALSE)
    
    # make EC names
    ec <- matched_res$Empirical.Compound
    ec.unique <- unique(matched_res$Empirical.Compound)
    
    for(i in seq_along(ec.unique)){
      ec <- replace(ec, grep(paste0("\\b", ec.unique[i], "\\b"), ec, perl=TRUE), paste0("EC000", i))
    }
    
    matched_res$Empirical.Compound <- gsub("\\|.*", "", ec)
    end <- Sys.time()
    totaltime <- end-start
    print(paste0(length(unique(matched_res$Empirical.Compound)), " empirical compounds identified in ", totaltime, " seconds."))
  }
  
  fast.write.csv(matched_res, file="mummichog_matched_compound_all.csv", row.names=FALSE);
  qs::qsave(matched_res, "mum_res.qs");
  
  # now update expr. profile
  matched_mz <- matched_res[,1];
  matched_ts <- mSetObj$dataSet$expr_dic[matched_mz];
  
  if(mSetObj$paramSet$mumRT & version=="v2") { # RT need to be in EC space
    # first create ecpd to expression dict
    ec.exp.mat <- data.frame(key=matched_res[,6], value=as.numeric(matched_ts), stringsAsFactors = F)
    ec_exp_dict <- Convert2Dictionary(ec.exp.mat);
    ec.exp.vec <- unlist(lapply(ec_exp_dict, max));
    
    # also need to make cpd_exp_dict for KEGG network view
    exp.mat <- data.frame(key=matched_res[,2], value=as.numeric(matched_ts));
    cpd_exp_dict <- Convert2Dictionary(exp.mat);
    
    # ecpd to cpd dict
    cpd_ecpd_dict <- Convert2Dictionary(matched_res[,c(2,6)])
    ecpd_cpd_dict <- Convert2Dictionary(matched_res[,c(6,2)])
    
    # now mz 2 ecpd dict
    mz2cpd_dict <- Convert2Dictionary(matched_res[,c(1,2)]); #indexed/named by mz
    mz2ec_dict <- Convert2Dictionary(matched_res[,c(1,6)])
    ec2mz_dict <- Convert2Dictionary(matched_res[,c(6,1)])
    
    # save to mSetObj
    mSetObj$ec_exp_dict <- ec_exp_dict
    mSetObj$cpd_exp_dict <- cpd_exp_dict;
    mSetObj$ec_exp <- ec.exp.vec
    mSetObj$mz2cpd_dict <- mz2cpd_dict;
    mSetObj$mz2ec_dict <- mz2ec_dict
    mSetObj$ec2mz_dict <- ec2mz_dict
    mSetObj$ecpd_cpd_dict <- ecpd_cpd_dict
    mSetObj$cpd_ecpd_dict <- cpd_ecpd_dict
    mSetObj$cpd_ecpd_counts <- cpd2ec1
    
    # now do matching to identify significant input_ecpdlist
    refmz <- names(mz2ec_dict)
    hits.index <- which(refmz %in% as.character(mSetObj$dataSet$input_mzlist));
    ec1 <- unique(unlist(mz2ec_dict[hits.index]));
    mSetObj$input_ecpdlist <- ec1;
    mSetObj$total_matched_ecpds <- unique(as.vector(matched_res$Empirical.Compound));

  } else {
    # get the expression profile for each 
    exp.mat <- data.frame(key=matched_res[,2], value=as.numeric(matched_ts));
    cpd_exp_dict <- Convert2Dictionary(exp.mat);
    # create average exp
    exp.vec <- unlist(lapply(cpd_exp_dict, mean));
    
    # now need to get the mapping from mz to compound id (one mz can have 0, 1, or more id hits)
    mz2cpd_dict <- Convert2Dictionary(matched_res[,c(1,2)]); #indexed/named by mz
    cpd2mz_dict <- Convert2Dictionary(matched_res[,c(2,1)]); # indexed/named by id

    # now do matching to identify significant input_cpdlist
    refmz <- names(mz2cpd_dict)
    hits.index <- which(refmz %in% as.character(mSetObj$dataSet$input_mzlist));
    cpd1 <- unique(unlist(mz2cpd_dict[hits.index]));
    
    if(.on.public.web){
        currency_tmp <- currency;
    } else {
        if(!exists("currency_r")){currency_r <- currency}
        currency_tmp <- currency_r;
    }
    
    cpd1 <- cpd1[!(cpd1 %in% currency_tmp)];

    mSetObj$mz2cpd_dict <- mz2cpd_dict;
    mSetObj$cpd_exp_dict <- cpd_exp_dict;
    mSetObj$cpd_exp <- exp.vec;
    mSetObj$cpd2mz_dict <- cpd2mz_dict;
    mSetObj$input_cpdlist <- cpd1;
    mSetObj$total_matched_cpds <- unique(as.vector(matched_res$Matched.Compound));
  }
  
  form.mat <- cbind(matched_res[,2], matched_res[,3]);
  cpd_form_dict <- Convert2Dictionary(form.mat);
  mSetObj$cpd_form_dict <- cpd_form_dict;
  
  return(mSetObj);
}

# version 2
# internal function for searching compound library
.search.compoundLibMeta <- function(mSetObjMeta, cpd.lib, cpd.treep, cpd.treen, metaLevel = "cpd",
                                    combine.level = "pvalue", pval.method = "fisher", es.method = "fixed",
                                    rank.metric = "mean", mutual.feats = TRUE){
  
  metaFiles <- unique(metaFiles)

  metaMsetObj <- vector("list")
  version <- mum.version ##TODO: There must be an error bc mum.version is not global variable any more

  # first do compound mapping
  for(meta_file in seq_along(metaFiles)){
    
    mSetObj <- qs::qread(metaFiles[meta_file])
    ref_mzlist <- as.numeric(mSetObj$dataSet$ref_mzlist);
    print(paste0("compoundLibMeta"));
    print(paste0("Got ", length(ref_mzlist), " mass features."))
    pos_inx <- mSetObj$dataSet$pos_inx;
    ref_mzlistp <- ref_mzlist[pos_inx];
    ref_mzlistn <- ref_mzlist[!pos_inx];
    
    # for empirical compounds
    if(mSetObj$paramSet$mumRT){ 
      ret_time_pos <- mSetObj$dataSet$ret_time[pos_inx];
      ret_time_neg <- mSetObj$dataSet$ret_time[!pos_inx]
      rt_tol <- mSetObj$dataSet$rt_tol
    }else{
      # add fake RT
      ret_time_pos <- rep(1, length(ref_mzlistp))
      ret_time_neg <- rep(1, length(ref_mzlistn))
    }
    
    modified.statesp <- colnames(cpd.lib$mz.matp);
    modified.statesn <- colnames(cpd.lib$mz.matn);
    my.tolsp <- mz_tolerance(ref_mzlistp, mSetObj$dataSet$instrument);
    my.tolsn <- mz_tolerance(ref_mzlistn, mSetObj$dataSet$instrument);
    
    # get mz ladder (pos index)
    self.mzsp <- floor(ref_mzlistp);
    all.mzsp <- cbind(self.mzsp-1, self.mzsp, self.mzsp+1);
    
    self.mzsn <- floor(ref_mzlistn);
    all.mzsn <- cbind(self.mzsn-1, self.mzsn, self.mzsn+1);
    
    # matched_res will contain detailed result (cmpd.id. query.mass, mass.diff) for all mz;
    # use a high-performance variant of list
    matched_resp <- myFastList();
    matched_resn <- myFastList();
    
    if(mSetObj$dataSet$mode != "negative"){
      for(i in seq_along(ref_mzlistp)){
        mz <- ref_mzlistp[i];
        rt <- ret_time_pos[i];
        my.tol <- my.tolsp[i];
        all.mz <- all.mzsp[i,];
        pos.all <- as.numeric(unique(unlist(cpd.treep[all.mz])));
        
        for(pos in pos.all){
          id <- cpd.lib$id[pos];
          mw.all <- cpd.lib$mz.matp[pos,]; #get modified mzs
          diffs <- abs(mw.all - mz); #modified mzs - mz original
          hit.inx <- which(diffs < my.tol);
          if(length(hit.inx)>0){
            for(spot in seq_along(hit.inx)){
              hit.pos <- hit.inx[spot];# need to match all
              index <- paste(mz, id, rt, hit.pos, sep = "_");
              matched_resp$add(index, c(i, id, mz, rt, mw.all[hit.pos], modified.statesp[hit.pos], diffs[hit.pos])); #replaces previous when hit.inx>1
            }
          }
        }
      }
    }
    
    all.mzsn <<- all.mzsn
    
    if(mSetObj$dataSet$mode != "positive"){
      for(i in seq_along(ref_mzlistn)){
        mz <- ref_mzlistn[i];
        rt <- ret_time_neg[i];
        my.tol <- my.tolsn[i];
        all.mz <- all.mzsn[i,];
        pos.all <- as.numeric(unique(unlist(cpd.treen[all.mz])));
        
        for(pos in pos.all){
          id <- cpd.lib$id[pos]; # position of compound in cpd.tree
          mw.all <- cpd.lib$mz.matn[pos,]; #get modified mzs
          diffs <- abs(mw.all - mz); #modified mzs - mz original
          hit.inx <- which(diffs < my.tol);
          if(length(hit.inx)>0){
            for(spot in seq_along(hit.inx)){
              hit.pos <- hit.inx[spot];# need to match all
              index <- paste(mz, id, rt, hit.pos, sep = "_"); #name in fast_list
              matched_resn$add(index, c(i, id, mz, rt, mw.all[hit.pos], modified.statesn[hit.pos], diffs[hit.pos])); #replaces previous when hit.inx>1
            }
          }
        }
      }
    }
    
    # convert to regular list
    if(mSetObj$dataSet$mode == "mixed"){
      
      matched_resn <- matched_resn$as.list();
      matched_resp <- matched_resp$as.list();
      
      neg_matches <- length(matched_resn) > 0
      pos_matches <- length(matched_resp) > 0
      
      if(!neg_matches & !pos_matches){
        msg.vec <<- "No compound matches from upload peak list!"
        return(0)
      }
      
      if(neg_matches){
        matched_resn <- data.frame(matrix(unlist(matched_resn), nrow=length(matched_resn), byrow=T), stringsAsFactors = FALSE);
      }
      
      if(pos_matches){
        matched_resp <- data.frame(matrix(unlist(matched_resp), nrow=length(matched_resp), byrow=T), stringsAsFactors = FALSE);
      }
      
      if(neg_matches & pos_matches){ # both w. matches
        matched_res <- rbind(matched_resp, matched_resn)
      }else if(neg_matches & !pos_matches){ # only neg w. matches
        matched_res <- matched_resn
      }else{ # only pos w. matches
        matched_res <- matched_resp
      }
      
    }else if(mSetObj$dataSet$mode == "positive"){
      matched_resp <- matched_resp$as.list();
      
      if(is.null(unlist(matched_resp))){
        msg.vec <<- "No compound matches from upload peak list!"
        return(0)
      }
      
      matched_resp <- data.frame(matrix(unlist(matched_resp), nrow=length(matched_resp), byrow=T), stringsAsFactors = FALSE);
      matched_res <- matched_resp
    }else{
      matched_resn <- matched_resn$as.list();
      
      if(is.null(unlist(matched_resn))){
        msg.vec <<- "No compound matches from upload peak list!"
        return(0)
      }
      
      matched_resn <- data.frame(matrix(unlist(matched_resn), nrow=length(matched_resn), byrow=T), stringsAsFactors = FALSE);
      matched_res <- matched_resn
    }
    
    # re-order columns for output
    matched_res <- matched_res[, c(3,2,6,7,4)];
    colnames(matched_res) <- c("Query.Mass", "Matched.Compound", "Matched.Form", "Mass.Diff", "Retention.Time");
    
    if(metaLevel == "cpd"){
      fileName = paste0("mummichog_matched_compound_", mSetObj$dataSet$fileName, "_all.csv")
      fast.write.csv(matched_res, file=fileName, row.names=FALSE);
    }
    
    # objects save to meta msetObj
    metafile <- tools::file_path_sans_ext(metaFiles[meta_file])
    metaMsetObj[[metafile]] <- vector("list", 5)
    
    # now update expr. profile
    matched_mz <- matched_res[,1];
    matched_ts <- mSetObj$dataSet$expr_dic[matched_mz];
    
    if(combine.level %in% c("pvalue", "both", "pool")){
      pvals <- mSetObj$dataSet$mummi.proc$p.value
      names(pvals) <- mSetObj$dataSet$mummi.proc$m.z
      matched_pvals <- pvals[matched_mz]
      metaMsetObj[[metafile]]$matched_pvals <- matched_pvals;
    } 
    
    if(combine.level %in% c("es", "both")){
      es <- mSetObj$dataSet$mummi.proc[,c("effect.size", "lower.ci", "upper.ci")]
      rownames(es) <- mSetObj$dataSet$mummi.proc$m.z
      match.inx <- match(matched_mz, rownames(es))
      matched_es <- es[match.inx,]
      metaMsetObj[[metafile]]$matched_es <- matched_es;
    }
    
    metaMsetObj[[metafile]]$mumResTable <- matched_res;
    metaMsetObj[[metafile]]$ref_mzlist <- mSetObj$dataSet$ref_mzlist 
    metaMsetObj[[metafile]]$input_mzlist <- mSetObj$dataSet$input_mzlist
    metaMsetObj[[metafile]]$matched_ts <- matched_ts;
    metaMsetObj[[metafile]]$mumRT <-mSetObj$paramSet$mumRT
  }
  
  # second fill in p-value and effect size information
  mSetObj <- mSetObjMeta;
  
  matched_res <- lapply(metaMsetObj, "[[", "mumResTable")
  matched_res <- data.table::rbindlist(matched_res, idcol = TRUE)
  matched_ts <- unlist(lapply(metaMsetObj, "[[", "matched_ts"))
  matched_res <- cbind(matched_res, matched_ts)
  
  if(combine.level %in% c("pvalue", "both", "pool")){
    matched_pval <- unlist(lapply(metaMsetObj, "[[", "matched_pvals"))
  }else{ # initialize empty
    matched_pval <- rep(NA, length(matched_ts))
  }
  
  if(combine.level %in% c("es", "both")){
    matched_es <- lapply(metaMsetObj, "[[", "matched_es")
    matched_es <- Reduce(rbind, matched_es)
  }else{ # initialize empty
    matched_es <- matrix("1", nrow = length(matched_ts), ncol=3)
    colnames(matched_es) <- c("effect.size", "lower.ci", "upper.ci") 
  }
  
  matched_res <- cbind(matched_res, Matched.Pval = matched_pval, matched_es)
  
  # combine at compound level
  if(metaLevel == "cpd"){
    
    if(mutual.feats){
      matched_res_ag <- aggregate(. ~ Matched.Compound, matched_res, paste, collapse=";")
      
      # keep compounds that only match across all files
      matched <- strsplit(matched_res_ag$.id, ";", fixed=TRUE)
      matched.inx <- vapply(matched, function(x) length(unique(x))==length(metaFiles), logical(1))
      
      if(sum(matched.inx) == 0){
        AddErrMsg("No compounds found across all files!")
        return(0)
      }
      
      matched_res_ag <- matched_res_ag[matched.inx,]
      # undo aggregate
      
      matched_res <- splitstackshape::cSplit(matched_res_ag, c(".id", "Query.Mass", "Matched.Form", "Mass.Diff", "Retention.Time", "matched_ts", 
                                                               "Matched.Pval", "effect.size", "lower.ci", "upper.ci"), 
                                             ";", "long", makeEqual = FALSE, type.convert = "as.character")
      matched_res <- data.table::setDF(matched_res)
      colnames(matched_res)[2] <- "File.Name"
      colnames(matched_res)[7:11] <- c("Matched.Scores", "Matched.Pvalue", "Matched.ES", "Matched.L.CI", "Matched.U.CI")
      
    }else{
      matched_res <- matched_res[, c(3,1,2,4:11)]
      matched_res <- data.table::setDF(matched_res)
      colnames(matched_res)[2] <- "File.Name"
      colnames(matched_res)[7:11] <- c("Matched.Scores", "Matched.Pvalue", "Matched.ES", "Matched.L.CI", "Matched.U.CI")
    }
    
    fast.write.csv(matched_res, file="mummichog_matched_compound_all.csv", row.names=FALSE);
    # or empirical compound combining here!  
  }else{
    
    all_ref_mz <- unlist(lapply(metaMsetObj, "[[", "ref_mzlist"))
    
    matched_res$RT.Rank <- rank(as.numeric(matched_res$Retention.Time), ties.method = "random")
    rt_tol_rank <- length(all_ref_mz) * mSetObj$dataSet$rt_frac
    
    # now create empirical compounds if necessary!
    # 1 compound matches to multiple m/z, filter by RT 
    if(mSetObj$paramSet$mumRT & version=="v2"){
      
      start <- Sys.time()
      empirical.cpd.list <- data.table:::split.data.table(matched_res, by="Matched.Compound", sorted = TRUE); # split all info by compound
      empirical.cpds2cpds <- vector(length=(length(empirical.cpd.list)), "list")
      names(empirical.cpds2cpds) <- names(empirical.cpd.list)
      
      # for each compound, if multiple matches, split into ECpds if > RT tolerance - rt_tol
      for(i in seq_along(empirical.cpd.list)){
        
        id <- empirical.cpd.list[[i]]$.id
        mzs <- empirical.cpd.list[[i]]$Query.Mass
        ions <- empirical.cpd.list[[i]]$Matched.Form
        rts <- as.numeric(empirical.cpd.list[[i]]$Retention.Time)
        rt.rank <- as.numeric(empirical.cpd.list[[i]]$RT.Rank)
        score <- empirical.cpd.list[[i]]$matched_ts
        mass.diff <- empirical.cpd.list[[i]]$Mass.Diff
        p.val <- empirical.cpd.list[[i]]$Matched.Pval
        es <- empirical.cpd.list[[i]]$effect.size
        l.ci <- empirical.cpd.list[[i]]$lower.ci
        u.ci <- empirical.cpd.list[[i]]$upper.ci
        cpds <- names(empirical.cpd.list)[i]
        
        # first, for each compound, determine ECs among matched ions
        if(length(mzs)>1){ # if multiple ECs per compound
          
          # first group together to create empirical cpds by rt
          names(rts) <- paste0(mzs, ";", ions, ";", rts, ";", cpds, ";", id, ";", score,
                               ";", mass.diff, ";", p.val,";", es, ";", l.ci, ";", u.ci)
          rts <- sort(rts)
          
          # second, group together to create empirical cpds by rt rank
          names(rt.rank) <- paste0(mzs, ";", ions, ";", rts, ";", cpds, ";", id, ";", score,
                                   ";", mass.diff, ";", p.val,";", es, ";", l.ci, ";", u.ci)
          rt.rank <- sort(rt.rank)
          
          split.inx <- c(0, cumsum(Reduce("&", list(abs(diff(rts)) > rt_tol, abs(diff(rt.rank)) > rt_tol_rank) )))
          
          # need to deal w. multiple rts but only 1 EC
          if(length(unique(split.inx)) > 1){
            e.cpds <- split(rts, split.inx)
            empirical.cpds2cpds[[i]] <- lapply(e.cpds, names)
          }else{
            empirical.cpds2cpds[[i]] <- paste0(names(rts), collapse="__")
          }
          
        }else{ # if only 1 EC per compound
          empirical.cpds2cpds[[i]] <- paste0(mzs, ";", ions, ";", rts, ";", cpds, ";", id, ";", score,
                                             ";", mass.diff, ";", p.val,";", es, ";", l.ci, ";", u.ci)
        }
      }
      
      initial_ecs <- unlist(empirical.cpds2cpds, recursive=FALSE)
      names(initial_ecs) <- paste0("EC", seq_along(initial_ecs))
      print(paste0(length(initial_ecs), " inital ECs created!"))
      
      # second, merge ECs if same m/z and form - append compounds
      try <- melt(initial_ecs)
      try2 <- strsplit(as.character(try[,1]), split="__", fixed=TRUE) # deals with multiple rts belonging to 1 EC
      try2 <- data.frame(value=unlist(try2), L1 = rep(try$L1, sapply(try2, length)))
      
      info <- strsplit(as.character(try2[,1]), split=";", fixed=TRUE)
      
      df_ecs <- data.frame(ec = as.character(try2[,2]), mz = sapply(info, `[[`, 1), form = sapply(info, `[[`, 2), rt = sapply(info, `[[`, 3), 
                           cpd = sapply(info, `[[`, 4), id = sapply(info, `[[`, 5), score = sapply(info, `[[`, 6), 
                           mass_diff = sapply(info, `[[`, 7), pval = sapply(info, `[[`, 8), es = sapply(info, `[[`, 9),
                           lci = sapply(info, `[[`, 10), uci = sapply(info, `[[`, 11), stringsAsFactors = F)
      
      df_ecs$str_row_inx <- paste(df_ecs$mz, df_ecs$form, df_ecs$rt, sep = "___")
      merged_ecs <- aggregate(. ~ str_row_inx, df_ecs, paste, collapse=";")
      
      # cleaning the df
      # merged_ecs$ec <- sapply(strsplit(merged_ecs$ec, ";"), function(x) unlist(x)[1]) - keep as long name
      merged_ecs$mz <- sapply(strsplit(merged_ecs$mz, ";", fixed=TRUE), function(x) unique(unlist(x)))
      merged_ecs$form <- sapply(strsplit(merged_ecs$form, ";", fixed=TRUE), function(x) unique(unlist(x)))
      merged_ecs$rt <- sapply(strsplit(merged_ecs$rt, ";", fixed=TRUE), function(x) unique(unlist(x)))
      merged_ecs$id <- sapply(strsplit(merged_ecs$id, ";", fixed=TRUE), function(x) paste0(unique(unlist(x)), collapse=";"))
      merged_ecs$score <- sapply(strsplit(merged_ecs$score, ";", fixed=TRUE), function(x) paste0(unique(unlist(x)), collapse=";") )
      merged_ecs$mass_diff <- sapply(strsplit(merged_ecs$mass_diff, ";", fixed=TRUE), function(x) paste0(unique(unlist(x)), collapse=";") )
      merged_ecs$pval <- sapply(strsplit(merged_ecs$pval, ";", fixed=TRUE), function(x) paste0(unique(unlist(x)), collapse=";") )
      merged_ecs$es <- sapply(strsplit(merged_ecs$es, ";", fixed=TRUE), function(x) paste0(unique(unlist(x)), collapse=";") )
      merged_ecs$lci <- sapply(strsplit(merged_ecs$lci, ";", fixed=TRUE), function(x) paste0(unique(unlist(x)), collapse=";") )
      merged_ecs$uci <- sapply(strsplit(merged_ecs$uci, ";", fixed=TRUE), function(x) paste0(unique(unlist(x)), collapse=";") )
      print(paste0(length(unique(merged_ecs$ec)), " merged ECs identified!"))
      
      # third, check if primary ion is present
      # needs to be per EC!
      if(mSetObj$dataSet$primary_ion=="yes"){
        
        ecs <- unique(merged_ecs$ec)
        
        # function to group ECs and verify if contains primary ion
        new_info <- lapply(ecs, function(x) { 
          new_ec_info <- merged_ecs[which(merged_ecs$ec == x),] # subset merged_ecs to rows containing ECx
          primary.inx <- length(intersect(new_ec_info$form, primary_ions))
          
          if(primary.inx>0){
            new_ec_info <- new_ec_info
          }else{
            new_ec_info <- NULL
          }
          new_ec_info
        })  
        
        final_ecs <- do.call(args=new_info, what=rbind)[,-1]
        
      }else{
        final_ecs <- merged_ecs[,-1]
      }
      
      colnames(final_ecs) <- c("Empirical.Compound", "Query.Mass", "Matched.Form", "Retention.Time", "Matched.Compound", "FileName", "Matched.Scores",
                               "Mass.Diff", "Matched.Pvalue", "Matched.ES", "Matched.L.CI", "Matched.U.CI")
      
      # transform to long format
      cpd_split <- strsplit(as.character(final_ecs$Matched.Compound), ";", fixed=TRUE)
      reps <- pmax(lengths(cpd_split))
      df2 <- final_ecs[rep(1:nrow(final_ecs), reps), c(1:4, 6:12)]
      df2$Matched.Compound <- unlist(mapply(function(x,y) c(x, rep(NA, y)), cpd_split, reps-lengths(cpd_split)))
      df2 <- unique(df2)
      
      # now deal with the fact that if at least one EC overlap, need to count as same EC per compound...
      my_final_cpds <- aggregate(. ~ Matched.Compound, df2, paste, collapse="___")
      my_final_cpds_list <- lapply(split(my_final_cpds$Empirical.Compound, my_final_cpds$Matched.Compound), unlist)
      
      cpd2ec1 <- lapply(seq_along(my_final_cpds_list), function(x) { # function used to make grouping of ecs per cpd
        
        ecs <- unlist(strsplit(my_final_cpds_list[[x]], "___", fixed=TRUE))
        
        if(length(ecs) > 1){
          ecs.list <- as.list(strsplit(ecs, ";", fixed=TRUE))
          library(igraph)
          m = sapply(ecs.list, function(x) sapply(ecs.list, function(y) length(intersect(x,y))>0))
          g = igraph::groups(components(graph_from_adjacency_matrix(m)))
          ecs <- paste0(sapply(g, function(z) paste0(ecs[z], collapse = "|") ), collapse = "___")
        }
        ecs
      })
      
      names(cpd2ec1) <- names(my_final_cpds_list)
      
      update_ecs <- lapply(seq_along(cpd2ec1), function(z) {
        
        ecs.old <- unlist(strsplit(my_final_cpds_list[[z]], "___", fixed=TRUE))
        ecs.new <- unlist(strsplit(cpd2ec1[[z]], "___", fixed=TRUE))
        
        for(i in seq_along(ecs.new)){
          pattern <- ecs.new[i]
          pattern_vec <- unlist(strsplit(pattern, "\\|", fixed=TRUE))
          up.pattern <- paste0(unique(pattern_vec), collapse = "|")
          ecs.old[ ecs.old %in% pattern_vec  ] <- up.pattern
        }
        
        ecs.old <- paste0(ecs.old, collapse = "___")
        ecs.old
      })
      
      updated_ecs <- do.call(rbind, update_ecs)
      my_final_cpds$Empirical.Compound <- updated_ecs
      
      new_dt <- data.table::data.table(my_final_cpds)
      new_dt <- new_dt[, list(Query.Mass = unlist(strsplit(as.character(Query.Mass), "___", fixed=TRUE)), 
                              Matched.Form = unlist(strsplit(as.character(Matched.Form), "___", fixed=TRUE)),
                              Retention.Time = unlist(strsplit(as.character(Retention.Time), "___", fixed=TRUE)),
                              Empirical.Compound = unlist(strsplit(as.character(Empirical.Compound), "___", fixed=TRUE)),
                              File.Name = unlist(strsplit(as.character(FileName), "___", fixed=TRUE)),
                              Matched.Scores = unlist(strsplit(as.character(Matched.Scores), "___", fixed=TRUE)),
                              Mass.Diff = unlist(strsplit(as.character(Mass.Diff), "___", fixed=TRUE)),
                              Matched.Pvalue = unlist(strsplit(as.character(Matched.Pvalue), "___", fixed=TRUE)),
                              Matched.ES = unlist(strsplit(as.character(Matched.ES), "___", fixed=TRUE)),
                              Matched.L.CI = unlist(strsplit(as.character(Matched.L.CI), "___", fixed=TRUE)),
                              Matched.U.CI = unlist(strsplit(as.character(Matched.U.CI), "___", fixed=TRUE))
      ),
      by = Matched.Compound]
      
      matched_res <- data.frame(Query.Mass = new_dt$Query.Mass, Matched.Compound = new_dt$Matched.Compound, Matched.Form = new_dt$Matched.Form, Mass.Diff = new_dt$Mass.Diff,
                                Retention.Time = new_dt$Retention.Time, Matched.Scores = new_dt$Matched.Scores, Matched.Pvalue = new_dt$Matched.Pvalue, 
                                Matched.ES = new_dt$Matched.ES, Matched.L.CI = new_dt$Matched.L.CI, Matched.U.CI = new_dt$Matched.U.CI,
                                Empirical.Compound = new_dt$Empirical.Compound, File.Name = new_dt$File.Name, stringsAsFactors = FALSE)
      
      # make new EC names
      ec <- as.list(matched_res$Empirical.Compound)
      ec.unique <- unique(matched_res$Empirical.Compound)
      ec.new <- paste0("EC000", seq_along(ec.unique))
      
      ec.new <- vapply(seq_along(ec), function(i) { 
        
        inx <- match(ec[[i]], ec.unique)
        ec <- ec.new[inx]
        ec
        
      }, character(1))
      
      matched_res$Empirical.Compound <- gsub("\\|.*", "", ec.new)
      end <- Sys.time()
      totaltime <- end-start
      print(paste0(length(unique(matched_res$Empirical.Compound)), " empirical compounds identified in ", totaltime, " seconds."))
      
      if(mutual.feats){
        # keep empirical compounds that only match across all files
        matched_res <- aggregate(. ~ Empirical.Compound, matched_res, paste, collapse="___")
        matched <- strsplit(matched_res$File.Name, ";|___")
        matched.inx <- vapply(matched, function(x) length(unique(x))==length(metaFiles), logical(1))
        
        if(sum(matched.inx)==0){
          AddErrMsg("No empirical compounds found across all studies!")
          return(0)
        }else if(sum(matched.inx) < 50){
          AddErrMsg("Not enough empirical compounds matched across all studies! Try meta-analysis at a higher level (compound or pathway).")
          return(0)
        }
        
        print(paste0(sum(matched.inx), "matched empirical compounds identified across all studies!"))
        
        matched_res <- matched_res[matched.inx,]
        matched_res <- splitstackshape::cSplit(matched_res, c("Query.Mass", "Matched.Compound", "Matched.Form", "Mass.Diff", "Retention.Time", "Matched.Scores", 
                                                              "Matched.Pvalue", "Matched.ES", "Matched.L.CI", "Matched.U.CI", "File.Name"), 
                                               "___", "long", makeEqual = FALSE, type.convert = "as.character")
        matched_res <- data.table::setDF(matched_res)
      }else{
        matched_res <- matched_res[,c(11,1:10,12)]
        matched_res <- data.table::setDF(matched_res)
      }
      fast.write.csv(matched_res, file="mummichog_matched_compound_postmerge.csv", row.names=FALSE);
    }else{
      AddErrMsg("Meta-analysis at empirical compound level is invalid!")
      return(0)
    }
  }
  
  ref_mzlist <- lapply(metaMsetObj, "[[", "ref_mzlist") 
  ref_mzlist <- unlist(unique(ref_mzlist))
  mSetObj$dataSet$ref_mzlist <- ref_mzlist
  
  mumRT <- unlist(lapply(metaMsetObj, "[[", "mumRT")) 
  qs::qsave(matched_res, "mum_res.qs");
  
  ##############################################
  # COMBINE EITHER P-VALUES
  # EFFECT-SIZES
  # OR BOTH (only for mummichog or integ_peaks)
  # then re-make input_cpdlist and input_ecpdlist
  # gsea uses rank metric only
  
  #  if(anal.type %in% c("mummichog", "integ_peaks")){
  
  if(combine.level %in% c("pvalue", "both")){
    
    if(metaLevel %in% "ec"){
      # merge to empirical compounds
      all_p <- aggregate(. ~ Empirical.Compound, matched_res, paste, collapse="___")
      old_p <- strsplit(all_p$Matched.Pvalue, "___", fixed=TRUE)
      scores <- strsplit(all_p$Matched.Pvalue, ";|___")
      scores <- lapply(scores, function(x) {
        if(length(x) == 1){
          x <- rep(x, 2)
        }
        x;}) 
      
    }else{
      # merge to compounds
      all_p <- aggregate(. ~ Matched.Compound, matched_res, paste, collapse="___")
      old_p <- strsplit(all_p$Matched.Pvalue, "___", fixed=TRUE)
      scores <- strsplit(all_p$Matched.Pvalue, ";|___")
      scores <- lapply(scores, function(x) {
        if(length(x) == 1){
          x <- rep(x, 2)
        }
        x;}) 
    }
    
    # combine p-values
    if(pval.method=="fisher"){
      meta.pvals <- lapply(scores, function(x) sumlog(as.numeric(x)))
    }else if(pval.method=="edgington"){ 
      meta.pvals <- lapply(scores, function(x) sump(as.numeric(x)))
    }else if(pval.method=="stouffer"){
      meta.pvals <- lapply(scores, function(x) sumz(x))
    }else if(pval.method=="vote"){
      meta.pvals <- lapply(scores, function(x) votep(x))
    }else if(pval.method=="min"){
      Meta.P <- lapply(scores, function(x) min(x) )
    }else if(pval.method=="max") {
      Meta.P <- lapply(scores, function(x) max(x) )
    }else{
      AddErrMsg("Invalid meta-analysis method!")
      return(0)
    }
    
    #extract p-values
    if(exists("meta.pvals")){
      Meta.P <- unlist(lapply(meta.pvals, function(z) z["p"]))
    }
    
    Meta.P2 <- rep(Meta.P, vapply(old_p, length, numeric(1)))
    matched_res$Meta.P <- Meta.P2
    #    }
    
    # now create input mzlist - used to create
    # input cpd/ec list
    cutoff <- mSetObj$dataSet$cutoff
    
    if(combine.level == "both"){
      my.inx <- matched_res[,"Meta.P.Both"] < cutoff
    }else if(combine.level == "pvalue"){
      my.inx <- matched_res[,"Meta.P"] < cutoff
    }else{
      my.inx <- matched_res[,"Meta.ES.Pval"] < cutoff
    }
    
    input_mzlist <- unlist(unique(matched_res[as.vector(my.inx), "Query.Mass"]))
    
  }else{ # gsea
    input_mzlist <- lapply(metaMsetObj, "[[", "input_mzlist") 
    input_mzlist <- unlist(unique(input_mzlist)) # will be updated later
  }
  
  sig.size <- length(input_mzlist);
  mSetObj$dataSet$N <- sig.size;
  mSetObj$dataSet$input_mzlist <- input_mzlist
  
  if(all(mumRT) & version=="v2"){ # RT need to be in EC space
    
    # for GSEA
    # need to merge t-scores if same m/z in the data
    if(rank.metric == "mean"){     # default using the mean
      matched_res$Matched.Scores <- vapply(matched_res$Matched.Scores, function(x) mean(as.numeric(unlist(strsplit(as.character(x), ";", fixed=TRUE)))), numeric(1))
    }else if(rank.metric == "min"){
      matched_res$Matched.Scores <- vapply(matched_res$Matched.Scores, function(x) min(as.numeric(unlist(strsplit(as.character(x), ";", fixed=TRUE)))), numeric(1))
    }else if(rank.metric == "max"){
      matched_res$Matched.Scores <- vapply(matched_res$Matched.Scores, function(x) max(as.numeric(unlist(strsplit(as.character(x), ";", fixed=TRUE)))), numeric(1))
    }else if(rank.metric == "median"){
      matched_res$Matched.Scores <- vapply(matched_res$Matched.Scores, function(x) median(as.numeric(unlist(strsplit(as.character(x), ";", fixed=TRUE)))), numeric(1))
    }else{
      AddErrMsg("Invalid method selected for merging scores!")
      return(0)
    }
    
    ec.exp.mat <- data.frame(key=matched_res$Empirical.Compound, value=as.numeric(matched_res$Matched.Scores), stringsAsFactors = F)
    ec_exp_dict <- Convert2Dictionary(ec.exp.mat);
    ec.exp.vec <- unlist(lapply(ec_exp_dict, max));
    
    # also need to make cpd_exp_dict for KEGG network view
    exp.mat <- data.frame(key=matched_res$Matched.Compound, value=as.numeric(matched_res$Matched.Scores), stringsAsFactors = F);
    cpd_exp_dict <- Convert2Dictionary(exp.mat);
    
    # ecpd to cpd dict
    cpd_ecpd_dict <- Convert2Dictionary(matched_res[,c(3,1)])
    ecpd_cpd_dict <- Convert2Dictionary(matched_res[,c(1,3)])
    
    # now mz 2 ecpd dict
    mz2cpd_dict <- Convert2Dictionary(matched_res[,c(2,3)]); #indexed/named by mz
    mz2ec_dict <- Convert2Dictionary(matched_res[,c(2,1)])
    ec2mz_dict <- Convert2Dictionary(matched_res[,c(1,2)])
    
    # save to mSetObj
    mSetObj$ec_exp_dict <- ec_exp_dict
    mSetObj$cpd_exp_dict <- cpd_exp_dict;
    mSetObj$ec_exp <- ec.exp.vec
    mSetObj$mz2cpd_dict <- mz2cpd_dict;
    mSetObj$mz2ec_dict <- mz2ec_dict
    mSetObj$ec2mz_dict <- ec2mz_dict
    mSetObj$ecpd_cpd_dict <- ecpd_cpd_dict
    mSetObj$cpd_ecpd_dict <- cpd_ecpd_dict
    mSetObj$cpd_ecpd_counts <- cpd2ec1
    
    # now do matching to identify significant input_ecpdlist
    # trio.list <- data.frame(mz = names(mz2ec_dict), ec = sapply(mz2ec_dict, paste, collapse="; "), cpd = sapply(mz2cpd_dict, paste, collapse="; "))
    refmz <- names(mz2ec_dict)
    hits.index <- which(refmz %in% as.character(input_mzlist));
    ec1 <- unique(unlist(mz2ec_dict[hits.index]));
    mSetObj$input_ecpdlist <- ec1;
    mSetObj$total_matched_ecpds <- unique(as.vector(matched_res$Empirical.Compound));
    form.mat <- cbind(matched_res[,2], matched_res[,4]);
    
  }else{ # compound level
    
    # get the expression profile for each 
    exp.mat <- data.frame(key=matched_res$Matched.Compound, value=as.numeric(matched_res$Matched.Scores), stringsAsFactors = F);
    cpd_exp_dict <- Convert2Dictionary(exp.mat);
    # create average exp
    exp.vec <- unlist(lapply(cpd_exp_dict, function(x) mean(unlist(x))));
    
    # now need to get the mapping from mz to compound id (one mz can have 0, 1, or more id hits)
    mz2cpd_dict <- Convert2Dictionary(matched_res[ , c("Query.Mass", "Matched.Compound")]) #indexed/named by mz
    cpd2mz_dict <- Convert2Dictionary(matched_res[ , c("Matched.Compound", "Query.Mass")]) # indexed/named by id
    
    # now do matching to identify significant input_cpdlist
    refmz <- names(mz2cpd_dict)
    hits.index <- which(refmz %in% as.character(input_mzlist));
    cpd1 <- unique(unlist(mz2cpd_dict[hits.index]));
        
    if(.on.public.web){
        currency_tmp <- currency;
    } else {
        if(!exists("currency_r")){currency_r <- currency}
        currency_tmp <- currency_r;
    }
    
    cpd1 <- cpd1[!(cpd1 %in% currency_tmp)];
    form.mat <- cbind(matched_res$Query.Mass, matched_res$Matched.Form);
    
    mSetObj$mz2cpd_dict <- mz2cpd_dict;
    mSetObj$cpd_exp_dict <- cpd_exp_dict;
    mSetObj$cpd_exp <- exp.vec;
    mSetObj$cpd2mz_dict <- cpd2mz_dict;
    mSetObj$input_cpdlist <- cpd1;
    mSetObj$dataSet$N <- length(input_mzlist);
    mSetObj$total_matched_cpds <- unique(as.vector(matched_res$Matched.Compound));
  }
  
  cpd_form_dict <- Convert2Dictionary(form.mat);
  mSetObj$cpd_form_dict <- cpd_form_dict;
  return(mSetObj);
}

#' @param method If "cohen", computes Cohen's d, if "hedges",
#' computes Hegdes' g effect size statistics.
#' @noRd
CalculateEffectSize <- function(mSetObj=NA, paired=FALSE, method="cohen"){
  
  mSetObj <- .get.mSet(mSetObj);
  
  inx1 <- which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[1])
  inx2 <- which(mSetObj$dataSet$cls==levels(mSetObj$dataSet$cls)[2])
  
  # samples in row, features in columns
  x <- mSetObj$dataSet$norm[inx1,]
  y <- mSetObj$dataSet$norm[inx2,]
  
  library(effsize)
  
  my.fun <- function(x) {
    
    if(method == "cohen"){
      tmp <- try(cohen.d(x[inx1], x[inx2], paired=paired, hedges.correction=FALSE));
    }else{
      tmp <- try(cohen.d(x[inx1], x[inx2], paired=paired, hedges.correction=TRUE));
    }
    
    if(class(tmp) == "try-error") {
      return(c(NA, NA, NA, NA));
    }else{
      return(c(tmp$estimate, tmp$sd, tmp$conf.int));
    }
  }
  
  results <- apply(as.matrix(mSetObj$dataSet$norm), 2, my.fun)
  rownames(results) <- c("effect_size", "win_group_stdev", "lower_ci", "upper_ci")
  
  mSetObj$analSet$effect.size <- t(results);
  return(.set.mSet(mSetObj));
}

# Internal function for permutation, no RT
.perform.mummichogPermutations <- function(mSetObj, permNum=100){

  print(paste('Resampling, ', permNum, 'permutations to estimate background ...'));
  permutation_hits <- permutation_record <- vector("list", permNum);
  matched_res <- qs::qread("mum_res.qs");
  set.seed(123)
  for(i in 1:permNum){ # for each permutation, create list of input compounds and calculate pvalues for each pathway
    input_mzlist <- sample(mSetObj$dataSet$ref_mzlist, mSetObj$dataSet$N)
    t <- make_cpdlist(mSetObj, input_mzlist);
    perm <- ComputeMummichogPermPvals(t, mSetObj$total_matched_cpds, mSetObj$pathways, matched_res, input_mzlist, mSetObj$cpd2mz_dict);
    permutation_record[[i]] <- perm[1]
    permutation_hits[[i]] <- perm[2]
  }

  # append new info
  mSetObj$perm_record <- permutation_record;
  mSetObj$perm_hits <- permutation_hits;
  return(mSetObj);
}

# Calculate p-values for each Lperm
# Used in higher mummichogR functions w.out RT
ComputeMummichogPermPvals <- function(input_cpdlist, total_matched_cpds, pathways, matches.res, input_mzlist, cpd2mz_dict){
  
  ora.vec <- input_cpdlist; #Lperm
  query_set_size <- length(ora.vec)
  current.mset <- pathways$cpds; #all
  total_cpds <- unique(total_matched_cpds) #matched compounds
  total_feature_num <- length(total_cpds)
  
  size <- negneg <- vector(mode="list", length=length(current.mset));
  
  cpds <- lapply(current.mset, function(x) intersect(x, total_cpds)); # pathways & all ref cpds
  feats <- lapply(current.mset, function(x) intersect(x, ora.vec)); #pathways & lsig
  feat_len <- unlist(lapply(feats, length)); # length of overlap features
  set.num <- unlist(lapply(cpds, length)); #cpdnum
  
  negneg <- sizes <- vector(mode="list", length=length(current.mset));
  
  for(i in seq_along(current.mset)){ # for each pathway
    sizes[[i]] <- min(feat_len[i], count_cpd2mz(cpd2mz_dict, unlist(feats[i]), input_mzlist))
    negneg[[i]] <- total_feature_num + sizes[[i]] - set.num[i] - query_set_size;
  }
  
  unsize <- as.integer(unlist(sizes))
  res.mat <- matrix(0, nrow=length(current.mset), ncol=1)
  fishermatrix <- cbind(unsize-1, set.num, (query_set_size + unlist(negneg)), query_set_size)
  res.mat[,1] <- apply(fishermatrix, 1, function(x) phyper(x[1], x[2], x[3], x[4], lower.tail=FALSE));
  perm_records <- list(res.mat, as.matrix(unsize));
  return(perm_records);
}

# Internal function for permutation
.perform.mummichogRTPermutations <- function(mSetObj, permNum){

  print(paste('Resampling, ', permNum, 'permutations to estimate background ...'));
  permutation_hits <- permutation_record <- vector("list", permNum);
  matched_res <- qs::qread("mum_res.qs");
  set.seed(123)
  for(i in 1:permNum){ # for each permutation, create list of input emp compounds and calculate pvalues for each pathway
    input_mzlist <- sample(mSetObj$dataSet$ref_mzlist, mSetObj$dataSet$N)
    t <- make_ecpdlist(mSetObj, input_mzlist);
    perm <- ComputeMummichogRTPermPvals(t, mSetObj$total_matched_ecpds, mSetObj$pathways, matched_res, input_mzlist);
    permutation_record[[i]] <- perm[1]
    permutation_hits[[i]] <- perm[2]
  }
  
  # append new info
  mSetObj$perm_record <- permutation_record;
  mSetObj$perm_hits <- permutation_hits;
  return(mSetObj);
}

# Calculate p-values for each Lperm
# Used in higher mummichogR functions w. RT
ComputeMummichogRTPermPvals <- function(input_ecpdlist, total_matched_ecpds, pathways, matches.res, input_mzlist){
  
  ora.vec <- input_ecpdlist; #Lperm
  query_set_size <- length(ora.vec) # query set size
  current.mset <- pathways$emp_cpds; #all
  total_ecpds <- unique(total_matched_ecpds) # matched number of empirical compounds
  total_feature_num <- length(total_ecpds)
  
  size <- negneg <- vector(mode="list", length=length(current.mset));
  
  ecpds <- lapply(current.mset, function(x) intersect(x, total_ecpds)); # pathways & all ref ecpds
  feats <- lapply(current.mset, function(x) intersect(x, ora.vec)); #pathways & query ecpds (perm lsig)
  feat_len <- unlist(lapply(feats, length)); # length of overlap features
  set.num <- unlist(lapply(ecpds, length)); #cpdnum
  
  negneg <- sizes <- vector(mode="list", length=length(current.mset));
  
  for(i in seq_along(current.mset)){ # for each pathway
    sizes[[i]] <- feat_len[i] # for ecs, just use length of overlap feats - overlap_size
    negneg[[i]] <- total_feature_num + sizes[[i]] - set.num[i] - query_set_size;
  }
  
  unsize <- as.integer(unlist(sizes))
  res.mat <- matrix(0, nrow=length(current.mset), ncol=1)
  fishermatrix <- cbind(unsize-1, set.num, (query_set_size + unlist(negneg)), query_set_size)
  res.mat[,1] <- apply(fishermatrix, 1, function(x) phyper(x[1], x[2], x[3], x[4], lower.tail=FALSE));
  perm_records <- list(res.mat, as.matrix(unsize));
  return(perm_records);
}

# Internal function for significant p value 
.compute.mummichogSigPvals <- function(mSetObj){
  
  qset <- unique(unlist(mSetObj$input_cpdlist)); #Lsig ora.vec
  query_set_size <- length(qset); #q.size
  
  total_cpds <- unique(mSetObj$total_matched_cpds) #all matched compounds
  total_feature_num <- length(total_cpds)
  
  current.mset <- mSetObj$pathways$cpds; #all compounds per pathway
  path.num <- unlist(lapply(current.mset, length));
  
  cpds <- lapply(current.mset, function(x) intersect(x, total_cpds)); #pathways & all ref cpds
  set.num <- unlist(lapply(cpds, length)); #cpdnum
  
  feats <- lapply(current.mset, function(x) intersect(x, qset)); #pathways & lsig
  feat_len <- unlist(lapply(feats, length)); # length of overlap features
  feat_vec <- sapply(feats, function(x) paste(x, collapse=";"))
  
  negneg <- sizes <- vector(mode="list", length=length(current.mset)); #empty lists
  
  for(i in seq_along(current.mset)){ # for each pathway
    sizes[[i]] <- min(feat_len[i], count_cpd2mz(mSetObj$cpd2mz_dict, unlist(feats[i]), mSetObj$dataSet$input_mzlist)) #min overlap or mz hits
    negneg[[i]] <- total_feature_num + sizes[[i]] - set.num[i] - query_set_size; # failure in left part
  }
  
  #error fixing for negatives, problem occurs when total_feat_num and query_set_size too close (lsig too close to lall)
  negneg <- rapply(negneg, function(x) ifelse(x<0,0,x), how = "replace") 
  
  unsize <- as.integer(unlist(sizes));
  
  uniq.count <- length(unique(unlist(current.mset, use.names = FALSE)));
  
  # prepare for the result table
  res.mat <- matrix(0, nrow=length(current.mset), ncol=8);
  
  #fishermatrix for phyper
  fishermatrix <- cbind(unsize-1, set.num, (query_set_size + unlist(negneg) - unsize), query_set_size); 
  first <- unlist(lapply(sizes, function(x) max(0, x-1)));
  easematrix <- cbind(first, (set.num - unsize + 1), (query_set_size - unsize), unlist(negneg)); 
  
  res.mat[,1] <- path.num;  
  res.mat[,2] <- set.num;
  res.mat[,3] <- unsize;
  res.mat[,4] <- query_set_size*(path.num/uniq.count); #expected
  res.mat[,5] <- apply(fishermatrix, 1, function(x) phyper(x[1], x[2], x[3], x[4], lower.tail=FALSE));
  res.mat[,6] <- apply(easematrix, 1, function(x) fisher.test(matrix(x, nrow=2), alternative = "greater")$p.value);
  res.mat[,7] <- set.num;
  res.mat[,8] <- unsize;
  colnames(res.mat) <- c("Pathway total", "Hits.total", "Hits.sig", "Expected", "FET", "EASE","Total","Sig");
  rownames(res.mat) <- mSetObj$pathways$name
  
  mSetObj$pvals <- res.mat;
  permutations_hits <- matrix(unlist(mSetObj$perm_hits), nrow=length(mSetObj$perm_hits), byrow=TRUE);
  sig_hits <- res.mat[,3]; # sighits
  sigpvalue <- res.mat[,6]; # EASE scores
  
  perm_record <- unlist(mSetObj$perm_record);
  perm_minus <- abs(0.9999999999 - perm_record);
  
  if(length(sig_hits[sig_hits!=0]) < round(length(sig_hits)*0.05)){ # too few hits that can't calculate gamma dist!
    if(!exists("adjustedp")){
      adjustedp <- rep(NA, length = length(res.mat[,1]))
    }
    res.mat <- cbind(res.mat, Gamma=adjustedp);
  }else{
    tryCatch({
      fit.gamma <- fitdistrplus::fitdist(perm_minus, distr = "gamma", method = "mle", lower = c(0, 0), start = list(scale = 1, shape = 1));
      rawpval <- as.numeric(sigpvalue);
      adjustedp <- 1 - (pgamma(1-rawpval, shape = fit.gamma$estimate["shape"], rate = fit.gamma$estimate["scale"]));
    }, error = function(e){
      if(mSetObj$dataSet$mum.type == "table"){
        if(!exists("adjustedp")){
          adjustedp <- rep(NA, length = length(res.mat[,1]))
        }
        res.mat <- cbind(res.mat, Gamma=adjustedp);
      }
      print(e)   
    }, finally = {
      if(!exists("adjustedp")){
        adjustedp <- rep(NA, length = length(res.mat[,1]))
      }
      res.mat <- cbind(res.mat, Gamma=adjustedp);
    })
  }
  
  #calculate empirical p-values
  record <- mSetObj$perm_record
  fisher.p <- as.numeric(res.mat[,5])
  
  #pathway in rows, perms in columns
  record_matrix <- do.call(cbind, do.call(cbind, record))
  num_perm <- ncol(record_matrix)
  
  #number of better hits for web
  better.hits <- sapply(seq_along(record_matrix[,1]), function(i) sum(record_matrix[i,] <= fisher.p[i])  )
  
  #account for a bias due to finite sampling - Davison and Hinkley (1997)
  emp.p <- sapply(seq_along(record_matrix[,1]), function(i) (sum(record_matrix[i,] <= fisher.p[i])/num_perm) )
  
  res.mat <- cbind(res.mat, Emp.Hits=better.hits, Empirical=emp.p, Cpd.Hits = feat_vec)
 
  # remove those no hits
  hit.inx <- as.numeric(as.character(res.mat[,3])) > 0;
  res.mat <- res.mat[hit.inx, , drop=FALSE];
  
  if(nrow(res.mat) <= 1){
    AddErrMsg("Not enough m/z to compound hits for pathway analysis!")
    return(0)
  }
  
  # prepare json element for network
  hits.all <- cpds[hit.inx];
  hits.sig <- feats[hit.inx];  
  path.nms <- mSetObj$pathways$name[hit.inx];
  
  # order by p-values
  ord.inx <- order(res.mat[,9]);
  Cpd.Hits <- res.mat[ord.inx, 12]
  res.mat <- signif(apply(as.matrix(res.mat[ord.inx, 1:11, drop=FALSE]), 2, as.numeric), 5);
  rownames(res.mat) <- path.nms[ord.inx];
  
  .save.mummichog.restable(res.mat, Cpd.Hits, mSetObj$mum_nm_csv);
  if(is.null(mSetObj$initPSEA) || mSetObj$initPSEA){
    mSetObj$mummi.resmat <- res.mat[,-11]; # not using adjusted for display other computing
    mSetObj$paramSet$mummi.lib <- mSetObj$lib.organism;
  }

  mSetObj$path.nms <- path.nms[ord.inx]
  mSetObj$path.hits <- convert2JsonList(hits.all[ord.inx])
  mSetObj$path.pval <- as.numeric(res.mat[,9])
  matched_res <- qs::qread("mum_res.qs");
  
  json.res <- list(
    cmpd.exp = mSetObj$cpd_exp,
    path.nms = path.nms[ord.inx],
    hits.all = convert2JsonList(hits.all[ord.inx]),
    hits.all.size = as.numeric(res.mat[,2]),
    hits.sig = convert2JsonList(hits.sig[ord.inx]),
    hits.sig.size = as.numeric(res.mat[,3]),
    fisher.p = as.numeric(res.mat[,5]),
    gamma.p = as.numeric(res.mat[,9]),
    peakToMet = mSetObj$cpd_form_dict,
    peakTable = matched_res
  );
  
  json.mat <- RJSONIO::toJSON(json.res);
  sink(mSetObj$mum_nm);
  cat(json.mat);
  sink();

        if(is.null(mSetObj$imgSet$enrTables)){
            mSetObj$imgSet$enrTables <- list();
        }
        vis.type <- "mumEnr";
        resTable <- res.mat[,c(2,3,5,9)];
        resTable <- cbind(Name=path.nms[ord.inx], res.mat);
        mSetObj$imgSet$enrTables[[vis.type]] <- list();
        mSetObj$imgSet$enrTables[[vis.type]]$table <- resTable;
        mSetObj$imgSet$enrTables[[vis.type]]$library <- mSetObj$lib.organism;
        mSetObj$imgSet$enrTables[[vis.type]]$algo <- "Mummichog Analysis";    
  return(mSetObj);
}

# Internal function for significant p value with RT
.compute.mummichogRTSigPvals <- function(mSetObj){
  qset <- unique(unlist(mSetObj$input_ecpdlist)); #Lsig ora.vec
  query_set_size <- length(qset); #q.size
  input_cpd <- unique(unlist(mSetObj$ecpd_cpd_dict[qset]));

  total_ecpds <- unique(mSetObj$total_matched_ecpds) #all matched compounds
  total_feature_num <- length(total_ecpds)
  total_cpds <- unique(unlist(mSetObj$ecpd_cpd_dict));
  
  current.mset <- mSetObj$pathways$emp_cpds; #all compounds per pathway
  path.num <- unlist(lapply(current.mset, length));
  
  ecpds <- lapply(current.mset, function(x) intersect(x, total_ecpds)); #pathways & all ref ecpds
  set.num <- unlist(lapply(ecpds, length)); # total ecpd num in pathway
  cpd.num<- unlist(lapply(lapply(mSetObj$pathways$cpds, function(x) intersect(x, total_cpds)), length)); 

  feats <- lapply(current.mset, function(x) intersect(x, qset)); #pathways & lsig
  feat_len <- unlist(lapply(feats, length)); # length of overlap features
  feat_vec <- sapply(feats, function(x) paste(x, collapse=";"))
  cpd_len<- lapply(mSetObj$pathways$cpds, function(x) intersect(x, input_cpd))

  negneg <- sizes <- vector(mode="list", length=length(current.mset)); #empty lists
  
  for(i in seq_along(current.mset)){ # for each pathway
    sizes[[i]] <- feat_len[i] # overlap size
    negneg[[i]] <- total_feature_num + sizes[[i]] - set.num[i] - query_set_size; # failure in left part
  }
  
  #error fixing for negatives, problem occurs when total_feat_num and query_set_size too close (lsig too close to lall)
  negneg <- rapply(negneg, function(x) ifelse(x<0,0,x), how = "replace") 
  
  unsize <- as.integer(unlist(sizes));
  
  uniq.count <- length(unique(unlist(current.mset, use.names = FALSE)));
  
  # prepare for the result table
  res.mat <- matrix(0, nrow=length(current.mset), ncol=8);
  
  #fishermatrix for phyper
  fishermatrix <- cbind(unsize-1, set.num, (query_set_size + unlist(negneg) - unsize), query_set_size);
  first <- unlist(lapply(sizes, function(x) max(0, x-1)));
  easematrix <- cbind(first, (set.num - unsize + 1), (query_set_size - unsize), unlist(negneg)); 
 

  res.mat[,1] <- unlist(lapply(mSetObj$pathways$cpds, length));  
  res.mat[,2] <- cpd.num;
  res.mat[,3] <- as.integer(unlist(lapply(cpd_len, length)));
  res.mat[,4] <- query_set_size*(path.num/uniq.count); #expected
  res.mat[,5] <- apply(fishermatrix, 1, function(x) phyper(x[1], x[2], x[3], x[4], lower.tail=FALSE));
  res.mat[,6] <- apply(easematrix, 1, function(x) fisher.test(matrix(x, nrow=2), alternative = "greater")$p.value);
  res.mat[,7] <- set.num
  res.mat[,8] <- unsize

  colnames(res.mat) <- c("Pathway total", "Hits.total", "Hits.sig", "Expected", "FET", "EASE","Total.EC","Sig.EC");
  rownames(res.mat) <- mSetObj$pathways$name
  
  mSetObj$pvals <- res.mat;
  permutations_hits <- matrix(unlist(mSetObj$perm_hits), nrow=length(mSetObj$perm_hits), byrow=TRUE);
  sig_hits <- unsize; # sighits
  sigpvalue <- res.mat[,5]; # EASE scores
  
  perm_record <- unlist(mSetObj$perm_record);
  perm_minus <- abs(0.9999999999 - perm_record);
  
  if(length(sig_hits[sig_hits!=0]) < round(length(sig_hits)*0.05)){ # too few hits that can't calculate gamma dist!
    if(!exists("adjustedp")){
      adjustedp <- rep(NA, length = length(res.mat[,1]))
    }
    res.mat <- cbind(res.mat, Gamma=adjustedp);
  }else{
    tryCatch({
      fit.gamma <- fitdistrplus::fitdist(perm_minus, distr = "gamma", method = "mle", lower = c(0, 0), start = list(scale = 1, shape = 1));
      rawpval <- as.numeric(sigpvalue);
      adjustedp <- 1 - (pgamma(1-rawpval, shape = fit.gamma$estimate["shape"], rate = fit.gamma$estimate["scale"]));
    }, error = function(e){
      if(mSetObj$dataSet$mum.type == "table"){
        if(!exists("adjustedp")){
          adjustedp <- rep(NA, length = length(res.mat[,1]))
        }
        res.mat <- cbind(res.mat, Gamma=adjustedp);
      }
      print(e)   
    }, finally = {
      if(!exists("adjustedp")){
        adjustedp <- rep(NA, length = length(res.mat[,1]))
      }
      res.mat <- cbind(res.mat, Gamma=adjustedp);
    })
  }
  
  #calculate empirical p-values
  record <- mSetObj$perm_record


  fisher.p <- as.numeric(res.mat[,5])
  
  #pathway in rows, perms in columns
  record_matrix <- do.call(cbind, do.call(cbind, record))
num_perm <- ncol(record_matrix)

#number of better hits for web
better.hits <- sapply(seq_along(record_matrix[,1]), function(i) sum(record_matrix[i,] <= fisher.p[i])  )

#account for a bias due to finite sampling - Davison and Hinkley (1997)
emp.p <- sapply(seq_along(record_matrix[,1]), function(i) (sum(record_matrix[i,] <= fisher.p[i])/num_perm) )

res.mat <- cbind(res.mat, Emp.Hits = better.hits, Empirical = emp.p, EC.Hits = feat_vec)

# remove pathways with no hits
hit.inx <- as.numeric(as.character(res.mat[,8])) > 0;
res.mat <- res.mat[hit.inx, , drop=FALSE];

if(nrow(res.mat) <= 1){
  AddErrMsg("Not enough m/z to compound hits for pathway analysis! Try Version 1 (no RT considerations)!")
  return(0)
}

# prepare json element for network
# need to convert ecpds to cpds
# and get average expression based on ec
cpds <- lapply(ecpds, function(x) unique(unlist(mSetObj$ecpd_cpd_dict[match(x, names(mSetObj$ecpd_cpd_dict))])) )  
cpd.exp.vec <- sapply(ecpds, function(x) mean(mSetObj$ec_exp[match(x, names(mSetObj$ec_exp))]) )
cpds_feats <- lapply(feats, function(x) unique(unlist(mSetObj$ecpd_cpd_dict[match(x, names(mSetObj$ecpd_cpd_dict))])) )  

# now make exp vec for all compounds
cpds2ec <- mSetObj$cpd_ecpd_dict
cpds.all <- unique(unlist(mSetObj$ecpd_cpd_dict[match(total_ecpds, names(mSetObj$ecpd_cpd_dict))]))
cpd.exp.vec <- sapply(cpds.all, function(x) sapply(seq_along(x), function(i) mean(mSetObj$ec_exp[match(unique(unlist(cpds2ec[match(x[[i]], names(cpds2ec))])), names(mSetObj$ec_exp))]) ) )

hits.all <- cpds[hit.inx];
hits.sig <- cpds_feats[hit.inx];  
path.nms <- mSetObj$pathways$name[hit.inx];

# order by p-values
if(length(na.omit(res.mat[,9])) == 0){
  ord.inx <- order(res.mat[,5]); # order by FET if gamma not able to be calc
}else{
  ord.inx <- order(res.mat[,9]); # order by gamma
}

EC.Hits = res.mat[ord.inx, 12]
res.mat <- signif(apply(as.matrix(res.mat[ord.inx, 1:11]), 2, as.numeric), 5); # loop through columns and keep rownames
rownames(res.mat) <- path.nms[ord.inx]

.save.mummichog.restable(res.mat, EC.Hits, mSetObj$mum_nm_csv);

if(is.null(mSetObj$initPSEA) || mSetObj$initPSEA){
    mSetObj$mummi.resmat <- res.mat[,-11];
    mSetObj$paramSet$mummi.lib <- mSetObj$lib.organism;
}
mSetObj$path.nms <- path.nms[ord.inx]
mSetObj$path.hits <- convert2JsonList(hits.all[ord.inx])
mSetObj$path.pval <- as.numeric(res.mat[,5])
matched_res <- qs::qread("mum_res.qs");

json.res <- list(
  cmpd.exp = cpd.exp.vec,
  path.nms = path.nms[ord.inx],
  hits.all = convert2JsonList(hits.all[ord.inx]),
  hits.all.size = as.numeric(res.mat[,7]),
  hits.sig = convert2JsonList(hits.sig[ord.inx]),
  hits.sig.size = as.numeric(res.mat[,8]),
  fisher.p = as.numeric(res.mat[,5]),
  gamma.p = as.numeric(res.mat[,9]),
  peakToMet = mSetObj$cpd_form_dict,
  peakTable = matched_res,
  pathIDs = names(hits.all)
);

  json.mat <- RJSONIO::toJSON(json.res);
  sink(mSetObj$mum_nm);
  cat(json.mat);
  sink();

    if(is.null(mSetObj$imgSet$enrTables)){
        mSetObj$imgSet$enrTables <- list();
    }
    vis.type <- "mumEnr";
    resTable <- cbind(Name=path.nms[ord.inx], res.mat[,c(7,8,5,9)]);
    mSetObj$imgSet$enrTables[[vis.type]] <- list();
    mSetObj$imgSet$enrTables[[vis.type]]$table <- resTable;
    mSetObj$imgSet$enrTables[[vis.type]]$library <- mSetObj$lib.organism;
    mSetObj$imgSet$enrTables[[vis.type]]$algo <- "Mummichog RT analysis";  

  return(mSetObj);
}

  # add adj p values for FET, EASE, and Gamma, and save the complete result table

.save.mummichog.restable <- function(my.res.mat, cpd.hits, file.name){
 
  adj.p.fet <- p.adjust(my.res.mat[,"FET"]);
  adj.p.ease <- p.adjust(my.res.mat[,"EASE"]);
  adj.p.gamma <-  p.adjust(my.res.mat[,"Gamma"]); 
  my.res.mat <- cbind(my.res.mat, AdjP.Fisher=adj.p.fet, AdjP.EASE=adj.p.ease, AdjP.Gamma=adj.p.gamma);
  my.res.mat <- my.res.mat[,-c(7,8)]; 
  my.res.mat <- cbind(my.res.mat, paste0("P", seq.int(1, nrow(my.res.mat))));
  colnames(my.res.mat)[ncol(my.res.mat)] = "Pathway Number";
  colnames(my.res.mat)[which(colnames(my.res.mat) == "FET")] <- "P(Fisher)";
  colnames(my.res.mat)[which(colnames(my.res.mat) == "EASE")] <- "P(EASE)";
  colnames(my.res.mat)[which(colnames(my.res.mat) == "Gamma")] <- "P(Gamma)";
  my.res.mat <- cbind(my.res.mat, cpd.hits);

  fast.write.csv(my.res.mat, file=file.name, row.names=TRUE);

}

## Internal function for calculating GSEA, no RT
.compute.mummichog.fgsea <- function(mSetObj, permNum){
  num_perm <- permNum;
  total_cpds <- mSetObj$cpd_exp #scores from all matched compounds
  
  current.mset <- mSetObj$pathways$cpds; #all compounds per pathway
  names(current.mset) <- mSetObj$pathways$name
  path.size <- unlist(lapply(mSetObj$pathways$cpds, length)) #total size of pathways
  
  df.scores <- data.frame(id=names(total_cpds), scores=total_cpds)
  ag.scores <- aggregate(id ~ scores, data = df.scores, paste, collapse = "; ")
    
  ag.sorted <- ag.scores[order(-ag.scores$scores),]
  row.names(ag.sorted) <- NULL
  
  dt.scores <- data.table::data.table(ag.sorted)
  dt.scores.out <- dt.scores[, list(scores=scores, 
                                    id = unlist(strsplit(id, "; ", fixed = TRUE))), 
                             by=1:nrow(dt.scores)]
  
  rank.vec <- as.numeric(dt.scores.out$nrow);
  names(rank.vec) <- as.character(dt.scores.out$id);
   
  scores.vec <- as.numeric(ag.sorted$scores)
  names(scores.vec) <- as.character(ag.sorted$id)

  # run fgsea
  if(mSetObj$paramSet$mumDataContainsPval == 0){
    rank.vec = seq.int(1, length(mSetObj$cpd_exp))
    names(rank.vec) <- names(mSetObj$cpd_exp)
    scores.vec = seq.int(1, length(mSetObj$cpd_exp))
    names(scores.vec) <- names(mSetObj$cpd_exp)
  }

  fgseaRes <- fgsea2(mSetObj, current.mset, scores.vec, rank.vec, num_perm)  
  res.mat <- matrix(0, nrow=length(fgseaRes$pathway), ncol=5)

  path.size <- unlist(lapply(current.mset, length))
  matched.size <- path.size[match(fgseaRes$pathway, names(path.size))]

  # create result table
  res.mat[,1] <- matched.size;
  res.mat[,2] <- fgseaRes$size;
  res.mat[,3] <- fgseaRes$pval;
  res.mat[,4] <- fgseaRes$padj;
  res.mat[,5] <- fgseaRes$NES;
  
  rownames(res.mat) <- fgseaRes$pathway;
  colnames(res.mat) <- c("Pathway_Total", "Hits", "P_val", "P_adj", "NES");
   
  # order by p-values
  ord.inx <- order(res.mat[,3]);
  res.mat <- signif(as.matrix(res.mat[ord.inx, ]), 4);

  if(is.null(mSetObj$initPSEA) || mSetObj$initPSEA){
    print("mSetObj$paramSet");
    mSetObj$mummi.gsea.resmat <- res.mat;
    mSetObj$paramSet$gsea.lib <- mSetObj$lib.organism;

  }

  Cpd.Hits <- qs::qread("pathwaysFiltered.qs")
  Cpd.Hits <- unlist(lapply(seq_along(Cpd.Hits), function(i) paste(names(Cpd.Hits[[i]]), collapse = ";")))
  Cpd.Hits <- Cpd.Hits[Cpd.Hits != ""]
  
  res.mat <- cbind(res.mat, Cpd.Hits[ord.inx])
  fast.write.csv(res.mat, file=mSetObj$mum_nm_csv, row.names=TRUE);
  
  matched_cpds <- names(mSetObj$cpd_exp)
  inx2<- stats::na.omit(match(rownames(res.mat), mSetObj$pathways$name))
  filt_cpds <- lapply(inx2, function(f) {mSetObj$pathways$cpds[f]})
  
  cpds <- lapply(filt_cpds, function(x) intersect(unlist(x), matched_cpds))
  mSetObj$path.nms <- rownames(res.mat)
  mSetObj$path.hits<- convert2JsonList(cpds)
  mSetObj$path.pval <- as.numeric(res.mat[,3])
  json.res <- list(cmpd.exp = total_cpds,
                   path.nms = rownames(res.mat),
                   hits.all = convert2JsonList(cpds),
                   hits.all.size = as.numeric(res.mat[,2]),
                   nes = fgseaRes$NES,
                   fisher.p = as.numeric(res.mat[,3]))
  
  json.mat <- RJSONIO::toJSON(json.res);
  sink(mSetObj$mum_nm);
  cat(json.mat);
  sink();

  return(mSetObj);
}

#' Internal function for calculating GSEA, with RT
#' @noRd
#' @importFrom stats aggregate
.compute.mummichog.RT.fgsea <- function(mSetObj, permNum){

  #Declare variable
  scores <- NULL;
  
  # Need to perform in EC space
  num_perm <- permNum;
  total_ecpds <- mSetObj$ec_exp #scores from all matched compounds
  
  current.mset <- mSetObj$pathways$emp_cpds; #all compounds per pathway
  names(current.mset) <- mSetObj$pathways$name
  path.size <- unlist(lapply(mSetObj$pathways$ecpds, length)) #total size of pathways
  
  df.scores <- data.frame(id=names(total_ecpds), scores=total_ecpds)
  ag.scores <- aggregate(id ~ scores, data = df.scores, paste, collapse = "; ")
  
  ag.sorted <- ag.scores[order(-ag.scores$scores),]
  row.names(ag.sorted) <- NULL
  
  dt.scores <- data.table::data.table(ag.sorted)
  dt.scores.out <- dt.scores[, list(scores=scores, id = unlist(strsplit(id, "; ", fixed = TRUE))), by=1:nrow(dt.scores)]
  
  rank.vec <- as.numeric(dt.scores.out$nrow)
  names(rank.vec) <- as.character(dt.scores.out$id)
  
  scores.vec <- as.numeric(ag.sorted$scores)
  names(scores.vec) <- as.character(ag.sorted$id)
  
  # run fgsea
  if(mSetObj$paramSet$mumDataContainsPval == 0){
    rank.vec = seq.int(1, length(mSetObj$ec_exp))
    names(rank.vec) <- names(mSetObj$ec_exp)
    scores.vec = seq.int(1, length(mSetObj$ec_exp))
    names(scores.vec) <- names(mSetObj$ec_exp)
  }
  
  fgseaRes <- fgsea2(mSetObj, current.mset, scores.vec, rank.vec, num_perm)

  res.mat <- matrix(0, nrow=length(fgseaRes$pathway), ncol=5)
  
 total_cpds <- names(mSetObj$cpd_exp_dict)
 total.match <- lapply(mSetObj$pathways$cpds, function(x) intersect(x,total_cpds))
 matched.size <- unlist(lapply(total.match, length))
 matched.size <- matched.size[match(fgseaRes$pathway, names(current.mset))]
 
 path.size <- unlist(lapply(mSetObj$pathways$cpds, length))
 path.size <- path.size[match(fgseaRes$pathway, names(current.mset))]
  # create result table
  res.mat[,1] <- path.size
  res.mat[,2] <- matched.size
  res.mat[,3] <- fgseaRes$pval
  res.mat[,4] <- fgseaRes$padj
  res.mat[,5] <- fgseaRes$NES
  
  rownames(res.mat) <- fgseaRes$pathway
  colnames(res.mat) <- c("Pathway_Total", "Hits", "P_val", "P_adj", "NES")
  
  # order by p-values
  ord.inx <- order(res.mat[,3]);
  res.mat <- signif(as.matrix(res.mat[ord.inx, ]), 4);
  
  mSetObj$mummi.gsea.resmat <- res.mat;
  
  EC.Hits <- qs::qread("pathwaysFiltered.qs")
  EC.Hits <- lapply(seq_along(EC.Hits), function(i) paste(names(EC.Hits[[i]]), collapse = ";"))
  res.mat <- cbind(res.mat, EC.Hits)
  fast.write.csv(res.mat, file=mSetObj$mum_nm_csv, row.names=TRUE);
  
  # need to convert ECs to compounds for json
  total_ecpds <- unique(mSetObj$total_matched_ecpds) #all matched compounds
  current.mset <- current.mset[match(rownames(res.mat), mSetObj$pathways$name)]
  ecpds <- lapply(current.mset, function(x) intersect(x, total_ecpds)); #pathways & all ref ecpds
  cpds <- lapply(ecpds, function(x) unique(unlist(mSetObj$ecpd_cpd_dict[match(x, names(mSetObj$ecpd_cpd_dict))])) )
  
  # now make exp vec for all compounds
  cpds2ec <- mSetObj$cpd_ecpd_dict
  cpds.all <- unique(unlist(mSetObj$ecpd_cpd_dict[match(total_ecpds, names(mSetObj$ecpd_cpd_dict))]))
  cpds.exp <- sapply(cpds.all, function(x) sapply(seq_along(x), function(i) mean(mSetObj$ec_exp[match(unique(unlist(cpds2ec[match(x[[i]], names(cpds2ec))])), names(mSetObj$ec_exp))]) ) )
  
  mSetObj$path.nms <- rownames(res.mat)
  mSetObj$path.hits <- convert2JsonList(cpds)
  mSetObj$path.pval <- as.numeric(res.mat[,3])
  
  json.res <- list(cmpd.exp = cpds.exp,
                   path.nms = rownames(res.mat),
                   hits.all = convert2JsonList(cpds),
                   hits.all.size = as.numeric(res.mat[,2]),
                   nes = fgseaRes$NES,
                   fisher.p = as.numeric(res.mat[,3]))
  
  json.mat <- RJSONIO::toJSON(json.res);
  sink(mSetObj$mum_nm);
  cat(json.mat);
  sink();
  
  return(mSetObj);
}

#####################################################################

#'Map currency metabolites to KEGG & BioCyc
#'@description This function maps the user selected list
#'of compounds to its corresponding KEGG IDs and BioCyc IDs
#'@param mSetObj Input the name of the created mSetObj object 
#'@author Jasmine Chong, Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export

PerformCurrencyMapping <- function(mSetObj = NA){
  
  mSetObj <- .get.mSet(mSetObj);
  
  qvec <- mSetObj$dataSet$cmpd;
  curr_db