R/parser-functions.R

Defines functions combine_phenodata parse_neg_results parse_pos_results

Documented in combine_phenodata parse_neg_results parse_pos_results

#' @title CAMERA Parser in positive mode
#'
#' @export
#' @description Parses the CAMERA results using well-defined rules to eliminate
#'   conflicting annotations.
#' @param raw a data frame with variables as columns.  Should contain all output
#'   columns from XCMS and CAMERA, additional columns from \code{match_Annotation} and
#'   \code{calc_minfrac}.  Must contain the CAMERA columns
#'   \code{"isotopes","adduct","pcgroup"}.
#' @param rule a data frame containing the rule list used by CAMERA to annotate
#'   ion adducts and fragments.  Must contain the columns
#'   \code{"name"},\code{"nmol"},\code{"charge"},\code{"massdiff"},\code{"oidscore"},
#'   \code{"quasi"},\code{"ips"}.
#' @param IonMode a character string defining the ionization mode.  Must be
#'   \code{"Positive"}
#' @return data frame parsed version of the original data frame with additional
#'   columns
#'   \code{"mono_mass"},\code{"metabolite_group"},\code{"monoisotopic_flg"},
#'   \code{"adduct_flg"},\code{"isotope_flg"},\code{"ambiguity_flg"},\code{"Selection_flg"}.
#' @examples
#' library(LUMA)
#' if(require(lcmsfishdata, quietly = TRUE)) {
#'
#'   file <- system.file("extdata","primary_adducts_pos.csv", package =
#'   "lcmsfishdata") # is case sensitive on Linux
#'   rules <- read.csv(file, header = TRUE)
#'   Peak.list <- as.data.frame(lcmsfishdata::Peaklist_Pos[["Annotated"]])
#'   test <- parse_pos_results(raw = Peak.list, rule = rules, IonMode = "Positive")
#'   colnames(test)[-which(colnames(test) %in% colnames(Peak.list))] ##Adds the following columns
#'   length(unique(Peak.list[["pcgroup"]])) #Originally were this many metabolite groupings
#'   length(unique(test[["metabolite_group"]])) #Now there are many more metabolite groupings
#'
#' }
parse_pos_results=function(raw,rule,IonMode){
  ##This code is a modified version of CAMERA_parser.m from the Ressom Omics Lab at Georgetown University (http://omics.georgetown.edu/) adapted for the R environment
  ##*******************************************************
  ##*******************POSITIVE   MODE*********************
  ##*******************************************************
  ##Note: the monoisotpic mass means the mass of [M+H]/[M-H] type of ion with the mono isotopes.
  if(IonMode != "Positive") stop("Error: IonMode must be \"Positive\".")
  raw.header <- colnames(raw)
  rule[,"X"] <- as.numeric(row.names(rule))
  ##Move last column to first in rule dataframe
  rule <- rule[,c(8,1,2,3,4,5,6,7)]
  rule.header <- colnames(rule)

  PROTO_MASS <- 1.00727646677

  ##Read isotope, adduct, pcgroup and m/z information
  adducts<-which(raw.header == "adduct")
  isotopes<-which(raw.header == "isotopes")
  mz<-which(raw.header == "mz")
  pcgroup<-which(raw.header == "pcgroup")

  isotopes<-raw[isotopes]
  adducts<-raw[adducts]
  mz<-raw[mz]
  pcgroup<-raw[pcgroup]

  ##Check if there is two lines with same adduct annotation and same pcgroup
  ##Find same annotation in the same pcgroup, both of them are removed
  for (i in 1:nrow(adducts)){
    if(adducts[i,1]!=""){
      idx<-intersect(which(adducts$adduct %in% adducts[i,1]), which(pcgroup$pcgroup %in% pcgroup[i,1]))
      if (length(idx)>1){
        # warning ("Find same annotation in the same pcgroup, both of them are removed: ",as.character(adducts[i,1]),", pcgroup ",pcgroup[i,1])
        adducts[idx,1]<-""
      }
      else if (length(idx)==0){
        stop("fatal error:1")
      }
    }
  }

  ##Alternative code for section above
  # x.temp<-cbind(adducts,pcgroup)
  # y.temp<-x.temp[duplicated(x.temp),]
  # z.temp<-unique(y.temp)
  # for (i in 1:nrow(z.temp)){
  #   if (z.temp$adduct[[i]]!="")
  #   {
  #     a<-which(x.temp$adduct == z.temp$adduct[[i]])
  #     for (k in 1:length(a)){
  #       if (x.temp$pcgroup[[a[k]]]==z.temp$pcgroup[[i]]){
  #         x.temp[a[k],1]<-""
  #       }
  #     }
  #   }
  # }

  ##Prepare a file with cleaned annotation information
  raw_cleaned <- raw
  raw_cleaned[,length(raw)-1] <- adducts
  raw<-raw_cleaned
  pcgroup_no <- sort(unique(raw_cleaned$pcgroup))

  ##Find and split those ions with more than one annotations
  # raw_single_annotation <- matrix(nrow =nrow(raw), ncol = ncol(raw))
  ambiguity_flg <- matrix(nrow = 0, ncol = 1)
  adducts$adduct <- as.character(adducts$adduct)
  for (j in 1:length(adducts$adduct)){
    if (length(grep("\\[.+\\].+\\[.+\\]", toString(adducts$adduct[[j]])))>0){
      # warning ("Conflicting annotations are splitted in pc_group ",pcgroup[j,],": ",as.character(adducts[j,]),":::",j)
      single_annotations <- strsplit(toString(adducts$adduct[[j]]),"(?<=[^-]\\d{1}) ", perl=TRUE)
      # % If the multiple adduct annotation coincide with isotope
      # % annotation, the isotope annotation is only preserved for the first
      # % adduct annotation. Otherwise it will cause error. One possible
      # % solution in the future is to add new isotope group and split
      # % related isotope annotations.
      #! Try to include some code to check if one of the conflicting annotations is present in the
      #! Matched.adduct column
      single_annotations <- unique(single_annotations[[1]])
      score<-1000
      a<-0
      b<-0
      for (i in 1:length(single_annotations)){
        temp <- sapply(strsplit(toString(single_annotations[i])," "), "[", 1)##isolate the adducts
        idx<-which(rule$name %in% temp)
        if (rule$X[idx]<score & rule$ips[idx]>=b) {
          score<- rule$X[idx]
          a<-i
          b<-rule$ips[idx]
        }
      }
      adducts$adduct[j] <- single_annotations[a]
      #     single_line <- raw_cleaned[j,]
      #     single_line[] <- lapply((single_line), as.character)
      #     single_line$adduct[[1]]<-single_annotations[a]
      #     raw_single_annotation<-rbind(raw_single_annotation,single_line[1,])
      ambiguity_flg<-rbind(ambiguity_flg,1)
    }
    else{
      #     raw_single_annotation<-rbind(raw_single_annotation,raw_cleaned[j,])
      ambiguity_flg<-rbind(ambiguity_flg,0)
    }
  }

  raw_single_annotation<-raw_cleaned

  raw_single_annotation[,length(raw)-1] <- adducts

  write.table(raw_single_annotation,file = "duplicate adduct removal for positive.csv",sep = ",",row.names=FALSE)
  # write.table(ambiguity_no,file="ambiguity_no.csv", sep = ",",row.names=FALSE)
  write.table(ambiguity_flg,file="ambiguity_flg.csv", sep = ",",row.names=FALSE)
  raw_single_annotation<-read.table(file = "duplicate adduct removal for positive.csv", sep=",", header = TRUE)
  # ambiguity_no<-read.table(file = "ambiguity_no.csv", sep=",", header = TRUE)
  ambiguity_flg<-read.table(file = "ambiguity_flg.csv", sep=",", header = TRUE)
  file.remove("duplicate adduct removal for positive.csv")
  file.remove("ambiguity_flg.csv")
  ##Re-extract the annotations
  isotopes <- raw_single_annotation[which(colnames(raw_single_annotation)=="isotopes")]
  adducts <- raw_single_annotation[which(colnames(raw_single_annotation)=="adduct")]
  mz <- raw_single_annotation[which(colnames(raw_single_annotation)=="mz")]
  pcgroup <- raw_single_annotation[which(colnames(raw_single_annotation)=="pcgroup")]
  pcgroup <- data.matrix(pcgroup)
  pcgroup_no<-sort(unique(pcgroup))

  ion_info<-matrix(c(0,0,0,0),ncol=4)
  ion_info_copy <- vector(mode = "numeric", length = 0)
  adduct_flg<-matrix(nrow = length(pcgroup), ncol = 1)
  iso_flg<-matrix(nrow = length(pcgroup), ncol = 1)
  mono_flg<-matrix(nrow = length(pcgroup), ncol = 1)
  ion_grp<-matrix(nrow = length(pcgroup), ncol = 1)
  iso_grp<-matrix(nrow = length(pcgroup), ncol = 1)
  mono_mass<-matrix(nrow = length(pcgroup), ncol = 1)

  for (i in 1:length(pcgroup_no)){
    curr_group<-pcgroup_no[i]
    ion_idx<-which(pcgroup %in% curr_group)
    group_iso<-isotopes[ion_idx,]
    group_add<-adducts[ion_idx,]
    group_mz<-as.numeric(mz[ion_idx,])
    ##Parse the adducts for a pcgroup. If it is [M+H], it is a monoisotopic ion. If it is annotated
    ##otherwise, it is an adduct ion. If there is no annotation, it is NOT an adduct ion at least,
    ##but not necessarily a monoisotopic ion. It should be noted if an ion is annotated as an
    ##adduct, only the one of mono isotopes is annotated. Although the adduct ion can have its
    ##set of isotopic ions. That's the reason why adducts are first processed here.
    for (j in 1:length(group_add)){
      if (group_add[j]!=""){
        curr_rule <- sapply(strsplit(toString(group_add[j])," "), "[", 1)##isolate the adducts
        rule_idx<-which(rule$name %in% curr_rule)
        ##mass_tmp is the equivalent m/z of [M+H] or [M-H] type of ion
        if(length(rule_idx)==0){
          stop("NO match found for adduct pattern ", as.character(curr_rule)," in rule list")
        }
        nmol<-rule[rule_idx,3]
        z<-rule[rule_idx,4]
        massdiff<-rule[rule_idx,5]
        mass_tmp<- (group_mz[j]*z - massdiff)/nmol +PROTO_MASS
        mass_signature<-as.numeric(sapply(strsplit(toString(group_add[j])," "), "[", 2))
        ##[M+H]or[M-H]
        if (rule_idx==1){
          mono_flg[ion_idx[j]]<-1
          mono_mass[ion_idx[j]]<-group_mz[j]
          adduct_flg[ion_idx[j]]<-0
        }
        else{
          adduct_flg[ion_idx[j]]<-1
          mono_flg[ion_idx[j]]<-0
        }
        ##If the ion has the same monoisotopic value and pcgroup as a previos ion, they are
        ##assigned to the same ion group with same monoisotopic mass. If not, a new ion_info
        ##entry is created
        if(any((mass_signature==ion_info[,4])&(curr_group==ion_info[,3]))){
          ion_grp_idx<-intersect(which(ion_info[,4] %in% mass_signature),which(ion_info[,3] %in% curr_group))
          ion_grp[ion_idx[j]]<-ion_info[ion_grp_idx,1]
        }
        else{
          ion_grp[ion_idx[j]]<-max(ion_info[,1])+1
          ion_info_temp <-cbind(max(ion_info[,1])+1,mass_tmp,curr_group,mass_signature)
          ion_info<-rbind(ion_info,ion_info_temp)
        }
      }
      else{
        adduct_flg[ion_idx[j]]<-0
      }
    }
    # %% Parse the isotopes of a pcgroup. If an ion is [M]+ and not an adduct of
    # %% the other ion, it is a monoisotopic ion, otherwise it is an adduct ion
    # %% with monoisotopic mass (but not considered as monoisotopic ion here). If
    # %% an ion is multiple-charged [M]n+(it is labeled under "isotopes" rather than
    # %% "adduct"), its monoisotopic mass with single charge is calculated as
    # %% (n*m/z - (n-1)*mass-of-Proton). If an ion is labeled but neither [M]+ nor
    # %% [M]n+, it is an isotopic ion. If an ion is not labeled, it is not an
    # %% isotopic ion.
    for (j in 1:length(group_iso)){
      if(group_iso[j]!=""){
        ## isolate the isotope group number
        iso_grp[ion_idx[j]]<-sapply(strsplit(toString(group_iso[j]),"\\[|\\]"), "[", 2)
        ## extract the isotopic information
        curr_iso<-sapply(strsplit(toString(group_iso[j]),"\\[\\d+\\]"), "[", 2)
        if(curr_iso=='[M]+'){
          if(adduct_flg[ion_idx[j]]==0){
            mono_flg[ion_idx[j]]<-1
            mono_mass[ion_idx[j]]<-group_mz[j]
            iso_flg[ion_idx[j]]<-0
          }
          else{
            mono_flg[ion_idx[j]]<-0
            iso_flg[ion_idx[j]]<-0
          }
        }
        ## if an ion is multiple-charged [M]n+
        else if(length(grep(".+\\[\\M\\]\\d\\+",toString(group_iso[j])))>0){
          charge_state<-as.numeric(sapply(strsplit(toString(group_iso[j]),"\\M\\]|\\+"), "[", 2))
          mono_flg[ion_idx[j]]<-0
          if(adduct_flg[ion_idx[j]]!=1){
            # new mono_mass is assigned only when this ion has no adduct annotation,
            # otherwise the mono_mass is calculated based on adduct annotation to deal
            # with clusterion (such as [2M+2H]2+)
            mono_mass[ion_idx[j]]<-charge_state*group_mz[j]-(charge_state-1)*PROTO_MASS
          }
          iso_flg[ion_idx[j]]<-0
        }
        else {
          iso_flg[ion_idx[j]]<-1
          mono_flg[ion_idx[j]]<-0
        }
      }
      else{
        iso_flg[ion_idx[j]]<-0
      }
    }
  }

  # %% For ions which are neither adducts nor isotopes and has not been assigned
  # %% a monoisotopic m/z yet, they are assumed to be monoisotopic and the
  # %% monoisotopic m/z are assigned.
  for (i in 1:length(pcgroup)){
    if (adduct_flg[i]==0 && iso_flg[i]==0 && is.na(mono_mass[i])){
      mono_flg[i]<-1
      mono_mass[i]<-mz$mz[i]
    }
  }
  ion_info<-ion_info[2:nrow(ion_info),]
  # %% If multiple ions are in the same ion group, they are adducts for each
  # %% other and share the same monoisotopic mass. If one of them is previously
  # %% assigned a mono isotopic mass, use the assigned one. If none of them has
  # %% monoisotopic mass (e.g. they are [M+Na] and [M+K]), use the one
  # %% calculated by CAMERA and stored in ion_info.
  temp_idx <- 1
  for (k in 1:nrow(ion_info)){
    #! if(is.matrix(ion_info_copy)){
    #!   ion_info_copy <- ion_info_loop
    #! }
    grp_ion_idx<-which(ion_grp %in% ion_info[k,1])
    grp_mass<-mono_mass[grp_ion_idx]
    if (sum(!is.na(grp_mass))==1){
      mono_mass[grp_ion_idx]<-grp_mass[!is.na(grp_mass)]
    }
    else if ((sum(!is.na(grp_mass))>1)&&(sum(mono_flg[grp_ion_idx])==1)){
      # % If there are both [M+2H]2+ and [M]2+ type of annotation, that
      # % means there are both adducts and isotopes of this ion. In
      # % this case, there could be two valid monoisotopic mass from
      # % previous calculation. One from [M+2H] and the other from
      # % [M+H]([M+Na] will not give a mono mass). In this case, we use
      # % the monoisotopic m/z from [M+H]+
      mono_mass[grp_ion_idx]<-grp_mass[mono_flg[grp_ion_idx]==1]
    }
    else if (sum(!is.na(grp_mass))>=2){
      # warning("More than one mono mass value for adducts. \nRemoving conflicting adduct annotation.")
      # print(raw_single_annotation[grp_ion_idx,(ncol(raw_single_annotation)-2):ncol(raw_single_annotation)])
      if(is.vector(ion_info_copy)){
        ion_info_copy <- ion_info[-k,]
        for (l in 1:length(grp_ion_idx)){
          mass_tmp <- raw_single_annotation[grp_ion_idx[l],"mz"]
          mass_signature <- mass_tmp + PROTO_MASS
          curr_group <- raw_single_annotation[grp_ion_idx[l],"pcgroup"]
          ion_info_temp <-cbind(max(ion_info[,1])+temp_idx,mass_tmp,curr_group,mass_signature)
          ion_grp[grp_ion_idx[l]] <- ion_info_temp[,1]
          ion_info_copy <- rbind(ion_info_copy,ion_info_temp)
          temp_idx <- temp_idx + 1
        }
      } else {
        if(nrow(ion_info_copy) >= nrow(ion_info) +1){
          sub.idx <- nrow(ion_info_copy) - nrow(ion_info)
          ion_info_loop <- ion_info_copy[-(k-sub.idx),]
          for (l in 1:length(grp_ion_idx)){
            mass_tmp <- raw_single_annotation[grp_ion_idx[l],"mz"]
            mass_signature <- mass_tmp - PROTO_MASS
            curr_group <- raw_single_annotation[grp_ion_idx[l],"pcgroup"]
            ion_info_temp <-cbind(max(ion_info[,1])+temp_idx,mass_tmp,curr_group,mass_signature)
            ion_grp[grp_ion_idx[l]] <- ion_info_temp[,1]
            ion_info_loop <- rbind(ion_info_loop,ion_info_temp)
            temp_idx <- temp_idx + 1
          }
          ion_info_copy <- ion_info_loop
        }
      }
      # stop("More than one mono mass value for adducts")
    }
    else{
      mono_mass[grp_ion_idx] = ion_info[k, 2]
    }
  }
  # %% If multiple ions are in the same isotope group, they are isotopes for
  # %% each other and have the same monoisotopic mass. For one isotopic ions to
  # %% appear, its monoisotopic ion must present.

  iso_grp_no<- unique(iso_grp[!is.na(iso_grp)])

  for (k in 1:length(iso_grp_no)){
    iso_grp_idx<-which(iso_grp %in% iso_grp_no[k])
    iso_grp_mass<-mono_mass[iso_grp_idx]
    if(sum(!is.na(iso_grp_mass))==1){
      mono_mass[iso_grp_idx]<-iso_grp_mass[!is.na(iso_grp_mass)]
    }else if(sum(!is.na(iso_grp_mass))>1) {
      #stop("more than one mono mass value for isotopes for isotope group ",iso_grp_no[k])
    }else {
      #stop("there is no monoisotopic ion found for isotope group ", iso_grp_no[k])
    }
  }

  ##save(pcgroup,iso_grp_no,iso_grp_no,pcgroup_no,group_add,group_iso,adduct_flg,ambiguity_flg,iso_flg,isotopes,mono_flg,mono_mass,ion_info,mz,pcgroup,raw_single_annotation,ion_grp,iso_grp,file="Temporary")
  ##load(file="Temporary")

  ## Group the ions into metabolites
  label_flg<-matrix(0L,nrow = length(pcgroup), ncol = 1)
  metabolite_grp<-matrix(0L,nrow = length(pcgroup), ncol = 1)
  metabolite_idx<-1

  ## The adducts are first grouped into metabolites
  if(is.matrix(ion_info_copy)) {
    for (k in 1:nrow(ion_info_copy)){
      ion_grp_idx<-which(ion_grp %in% ion_info_copy[k,1])
      if(length(ion_grp_idx)>0){
        label_flg[ion_grp_idx]<-1
        metabolite_grp[ion_grp_idx]<-metabolite_idx
        metabolite_idx<-metabolite_idx+1
      }
      else{
        warning("Error: ion group does not exist for ion group ", ion_info_copy[k, 1])
      }
    }
  } else {
    for (k in 1:nrow(ion_info)){
      ion_grp_idx<-which(ion_grp %in% ion_info[k,1])
      if(length(ion_grp_idx)>0){
        label_flg[ion_grp_idx]<-1
        metabolite_grp[ion_grp_idx]<-metabolite_idx
        metabolite_idx<-metabolite_idx+1
      }
      else{
        warning("Error: ion group does not exist for ion group ", ion_info[k, 1])
      }
    }}

  ## The isotopes are then grouped into metabolites. If a metabolite number
  ## already exists from the adducts of the isotopics ions, use the existing
  ## one. Otherwise, use a new one.

  for (k in 1:length(iso_grp_no)) {
    iso_grp_idx<-which(iso_grp %in% iso_grp_no[k])
    curr_metabolite_grp_no<-metabolite_grp[iso_grp_idx]
    if(any(curr_metabolite_grp_no>0)){
      if (sum(curr_metabolite_grp_no>0)==1){
        metabolite_grp[iso_grp_idx]<-curr_metabolite_grp_no[curr_metabolite_grp_no>0]
        label_flg[iso_grp_idx]<-1
      }
      else {
        stop("more than one metabolite group number for isotopes")
      }
    }
    else {
      metabolite_grp[iso_grp_idx]<-metabolite_idx
      label_flg[iso_grp_idx]<-1
      metabolite_idx<-metabolite_idx+1
    }
  }

  ## For the remaining ions, assign a metabolite number
  metabolite_grp[!label_flg] <- metabolite_idx:(metabolite_idx + sum(!label_flg)-1)
  uni_metabolite <- unique(metabolite_grp)
  mono_selector = matrix(0L,nrow = length(mono_flg), ncol = 1)
  for (i in 1:length(uni_metabolite)){
    idx <- which(metabolite_grp %in% uni_metabolite[i])
    idx_mono <- which(mono_flg[idx]>0)
    if (length(idx_mono)==1){
      mono_selector[idx[idx_mono]]<-1
    }
    else if(length(idx_mono)>=2){
      #stop("more than one monoisotopic peak")
    }
    else{
      mono_selector[idx[1]]<-1;
    }
  }

  ## Change mono mz value to neutral monoisotopic mass
  mono_mass <- mono_mass - PROTO_MASS
  ## Output the parsed results into a csv file
  Peak.list<-raw_single_annotation
  # attributes(Peak.list)
  Peak.list <- cbind(Peak.list,mono_mass,metabolite_grp,mono_flg,adduct_flg,iso_flg,ambiguity_flg,mono_selector)
  colnames(Peak.list)[(ncol(raw_single_annotation)+1):(ncol(raw_single_annotation)+7)]<-cbind("mono_mass","metabolite_group","monoisotopic_flg","adduct_flg","isotope_flg","ambiguity_flg","Selection_flg")

  return(Peak.list)
}

#' @title CAMERA Parser in negative mode
#'
#' @export
#' @description Parses the CAMERA results using well-defined rules to eliminate
#'   conflicting annotations.
#' @param raw a data frame with variables as columns.  Should contain all output
#'   columns from XCMS and CAMERA, additional columns from \code{match_Annotation} and
#'   \code{calc_minfrac}.  Must contain the CAMERA columns
#'   \code{"isotopes","adduct","pcgroup"}.
#' @param rule a data frame containing the rule list used by CAMERA to annotate
#'   ion adducts and fragments.  Must contain the columns
#'   \code{"name"},\code{"nmol"},\code{"charge"},\code{"massdiff"},\code{"oidscore"},
#'   \code{"quasi"},\code{"ips"}.
#' @param IonMode a character string defining the ionization mode.  Must be
#'   \code{"Negative"}
#' @return data frame parsed version of the original data frame with additional
#'   columns
#'   \code{"mono_mass"},\code{"metabolite_group"},\code{"monoisotopic_flg"},
#'   \code{"adduct_flg"},\code{"isotope_flg"},\code{"ambiguity_flg"},\code{"Selection_flg"}.
#' @examples
#' library(LUMA)
#' if(require(lcmsfishdata, quietly = TRUE)) {
#'
#'   file <- system.file("extdata","primary_adducts_neg.csv", package =
#'   "lcmsfishdata") # is case sensitive on Linux
#'   rules <- read.csv(file, header = TRUE)
#'   Peak.list <- as.data.frame(lcmsfishdata::Peaklist_Neg[["Annotated"]])
#'   test <- parse_neg_results(raw = Peak.list, rule = rules, IonMode = "Negative")
#'   colnames(test)[-which(colnames(test) %in% colnames(Peak.list))] ##Adds the following columns
#'   length(unique(Peak.list[["pcgroup"]])) #Originally were this many metabolite groupings
#'   length(unique(test[["metabolite_group"]])) #Now there are many more metabolite groupings
#'
#' }
parse_neg_results=function(raw,rule,IonMode){
  ##This code is a modified version of CAMERA_parser.m from the Ressom Omics Lab at Georgetown University (http://omics.georgetown.edu/) adapted for the R environment
  ##*******************************************************
  ##*******************NEGATIVE   MODE*********************
  ##*******************************************************
  ##Note: the monoisotpic mass means the mass of [M+H]/[M-H] type of ion with the mono isotopes.
  if(IonMode != "Negative") stop("Error: IonMode must be \"Negative\".")

  ## Input CAMERA output and Negitive Rule file
  raw.header <- colnames(raw)
  rule[,"X"] <- as.numeric(row.names(rule))
  ##Move last column to first in rule dataframe
  rule <- rule[,c(8,1,2,3,4,5,6,7)]
  rule.header <- colnames(rule)
  PROTO_MASS <- 1.00727646677

  ##Read isotope, adduct, pcgroup and m/z information
  adducts<-which(raw.header == "adduct")
  isotopes<-which(raw.header == "isotopes")
  mz<-which(raw.header == "mz")
  pcgroup<-which(raw.header == "pcgroup")

  isotopes<-raw[isotopes]
  adducts<-raw[adducts]
  mz<-raw[mz]
  pcgroup<-raw[pcgroup]

  ##Check if there is two lines with same adduct annotation and same pcgroup
  ##Find same annotation in the same pcgroup, both of them are removed
  for (i in 1:nrow(adducts)){
    if(adducts[i,1]!=""){
      idx<-intersect(which(adducts$adduct %in% adducts[i,1]), which(pcgroup$pcgroup %in% pcgroup[i,1]))
      if (length(idx)>1){
        # warning ("Find same annotation in the same pcgroup, both of them are removed: ",as.character(adducts[i,1]),", pcgroup ",pcgroup[i,1])
        adducts[idx,1]<-""
      }
      else if (length(idx)==0){
        stop("fatal error:1")
      }
    }
  }

  ##Alternative code for section above
  # x.temp<-cbind(adducts,pcgroup)
  # y.temp<-x.temp[duplicated(x.temp),]
  # z.temp<-unique(y.temp)
  # for (i in 1:nrow(z.temp)){
  #   if (z.temp$adduct[[i]]!="")
  #   {
  #     a<-which(x.temp$adduct == z.temp$adduct[[i]])
  #     for (k in 1:length(a)){
  #       if (x.temp$pcgroup[[a[k]]]==z.temp$pcgroup[[i]]){
  #         x.temp[a[k],1]<-""
  #       }
  #     }
  #   }
  # }

  ##Prepare a file with cleaned annotation information
  raw_cleaned <- raw
  raw_cleaned[,length(raw)-1] <- adducts
  raw<-raw_cleaned
  pcgroup_no <- sort(unique(raw_cleaned$pcgroup))

  ##Find and split those ions with more than one annotations
  # raw_single_annotation <- matrix(nrow =nrow(raw), ncol = ncol(raw))
  ambiguity_flg <- matrix(nrow = 0, ncol = 1)
  adducts$adduct <- as.character(adducts$adduct)
  for (j in 1:length(adducts$adduct)){
    if (length(grep("\\[.+\\].+\\[.+\\]", toString(adducts$adduct[[j]])))>0){
      # warning ("Conflicting annotations are splitted in pc_group ",pcgroup[j,],": ",as.character(adducts[j,]),":::",j)
      single_annotations <- strsplit(toString(adducts$adduct[[j]]),"(?<=[^-]\\d{1}) ", perl=TRUE)
      # % If the multiple adduct annotation coincide with isotope
      # % annotation, the isotope annotation is only preserved for the first
      # % adduct annotation. Otherwise it will cause error. One Negsible
      # % solution in the future is to add new isotope group and split
      # % related isotope annotations.
      single_annotations <- unique(single_annotations[[1]])
      score<-1000
      a<-0
      b<-0
      for (i in 1:length(single_annotations)){
        temp <- sapply(strsplit(toString(single_annotations[i])," "), "[", 1)##isolate the adducts
        idx<-which(rule$name %in% temp)
        if (rule$X[idx]<score & rule$ips[idx]>=b) {
          score<- rule$X[idx]
          a<-i
          b<-rule$ips[idx]
        }
      }
      adducts$adduct[j] <- single_annotations[a]
      #     single_line <- raw_cleaned[j,]
      #     single_line[] <- lapply((single_line), as.character)
      #     single_line$adduct[[1]]<-single_annotations[a]
      #     raw_single_annotation<-rbind(raw_single_annotation,single_line[1,])
      ambiguity_flg<-rbind(ambiguity_flg,1)
    }
    else{
      #     raw_single_annotation<-rbind(raw_single_annotation,raw_cleaned[j,])
      ambiguity_flg<-rbind(ambiguity_flg,0)
    }
  }

  raw_single_annotation<-raw_cleaned

  raw_single_annotation[,length(raw)-1] <- adducts


  row.names(raw_single_annotation)<-1:nrow(raw_single_annotation)
  write.table(raw_single_annotation,file = "duplicate adduct removal for Negitive.csv",sep = ",",row.names=FALSE)
  #write.table(ambiguity_no,file="ambiguity_no.csv", sep = ",",row.names=FALSE)
  write.table(ambiguity_flg,file="ambiguity_flg.csv", sep = ",",row.names=FALSE)
  raw_single_annotation<-read.table(file = "duplicate adduct removal for Negitive.csv", sep=",", header = TRUE)
  #ambiguity_no<-read.table(file = "ambiguity_no.csv", sep=",", header = TRUE)
  ambiguity_flg<-read.table(file = "ambiguity_flg.csv", sep=",", header = TRUE)
  file.remove("duplicate adduct removal for Negitive.csv")
  file.remove("ambiguity_flg.csv")
  ##Re-extract the annotations
  isotopes <- raw_single_annotation[which(colnames(raw_single_annotation)=="isotopes")]
  adducts <- raw_single_annotation[which(colnames(raw_single_annotation)=="adduct")]
  mz <- raw_single_annotation[which(colnames(raw_single_annotation)=="mz")]
  pcgroup <- raw_single_annotation[which(colnames(raw_single_annotation)=="pcgroup")]
  pcgroup <- data.matrix(pcgroup)
  pcgroup_no<-sort(unique(pcgroup))

  ion_info<-matrix(c(0,0,0,0),ncol=4)
  ion_info_copy <- vector(mode = "numeric", length = 0)
  adduct_flg<-matrix(nrow = length(pcgroup), ncol = 1)
  iso_flg<-matrix(nrow = length(pcgroup), ncol = 1)
  mono_flg<-matrix(nrow = length(pcgroup), ncol = 1)
  ion_grp<-matrix(nrow = length(pcgroup), ncol = 1)
  iso_grp<-matrix(nrow = length(pcgroup), ncol = 1)
  mono_mass<-matrix(nrow = length(pcgroup), ncol = 1)

  for (i in 1:length(pcgroup_no)){
    curr_group<-pcgroup_no[i]
    ion_idx<-which(pcgroup %in% curr_group)
    group_iso<-isotopes[ion_idx,]
    group_add<-adducts[ion_idx,]
    group_mz<-as.numeric(mz[ion_idx,])
    ##Parse the adducts for a pcgroup. If it is [M+H], it is a monoisotopic ion. If it is annotated
    ##otherwise, it is an adduct ion. If there is no annotation, it is NOT an adduct ion at least,
    ##but not necessarily a monoisotopic ion. It should be noted if an ion is annotated as an
    ##adduct, only the one of mono isotopes is annotated. Although the adduct ion can have its
    ##set of isotopic ions. That's the reason why adducts are first processed here.
    for (j in 1:length(group_add)){
      if (group_add[j]!=""){
        curr_rule <- sapply(strsplit(toString(group_add[j])," "), "[", 1)##isolate the adducts
        rule_idx<-which(rule$name %in% curr_rule)
        ##mass_tmp is the equivalent m/z of [M+H] or [M-H] type of ion
        if(length(rule_idx)==0){
          stop("NO match found for adduct pattern ", as.character(curr_rule)," in rule list")
        }
        nmol<-rule[rule_idx,3]
        z<-rule[rule_idx,4]
        massdiff<-rule[rule_idx,5]
        mass_tmp<- (group_mz[j]*-z - massdiff)/nmol -PROTO_MASS
        mass_signature<-as.numeric(sapply(strsplit(toString(group_add[j])," "), "[", 2))
        ##[M+H]or[M-H]
        if (rule_idx==1){
          mono_flg[ion_idx[j]]<-1
          mono_mass[ion_idx[j]]<-group_mz[j]
          adduct_flg[ion_idx[j]]<-0
        }
        else{
          adduct_flg[ion_idx[j]]<-1
          mono_flg[ion_idx[j]]<-0
        }
        ##If the ion has the same monoisotopic value and pcgroup as a previos ion, they are
        ##assigned to the same ion group with same monoisotopic mass. If not, a new ion_info
        ##entry is created
        if(any((mass_signature==ion_info[,4])&(curr_group==ion_info[,3]))){
          ion_grp_idx<-intersect(which(ion_info[,4] %in% mass_signature),which(ion_info[,3] %in% curr_group))
          ion_grp[ion_idx[j]]<-ion_info[ion_grp_idx,1]
        }
        else{
          ion_grp[ion_idx[j]]<-max(ion_info[,1])+1
          ion_info_temp <-cbind(max(ion_info[,1])+1,mass_tmp,curr_group,mass_signature)
          ion_info<-rbind(ion_info,ion_info_temp)
        }
      }
      else{
        adduct_flg[ion_idx[j]]<-0
      }
    }
    # %% Parse the isotopes of a pcgroup. If an ion is [M]+ and not an adduct of
    # %% the other ion, it is a monoisotopic ion, otherwise it is an adduct ion
    # %% with monoisotopic mass (but not considered as monoisotopic ion here). If
    # %% an ion is multiple-charged [M]n+(it is labeled under "isotopes" rather than
    # %% "adduct"), its monoisotopic mass with single charge is calculated as
    # %% (n*m/z - (n-1)*mass-of-Proton). If an ion is labeled but neither [M]+ nor
    # %% [M]n+, it is an isotopic ion. If an ion is not labeled, it is not an
    # %% isotopic ion.
    for (j in 1:length(group_iso)){
      if(group_iso[j]!=""){
        ## isolate the isotope group number
        iso_grp[ion_idx[j]]<-sapply(strsplit(toString(group_iso[j]),"\\[|\\]"), "[", 2)
        ## extract the isotopic information
        curr_iso<-sapply(strsplit(toString(group_iso[j]),"\\[\\d+\\]"), "[", 2)
        if(curr_iso=='[M]-'){
          if(adduct_flg[ion_idx[j]]==0){
            mono_flg[ion_idx[j]]<-1
            mono_mass[ion_idx[j]]<-group_mz[j]
            iso_flg[ion_idx[j]]<-0
          }
          else{
            mono_flg[ion_idx[j]]<-0
            iso_flg[ion_idx[j]]<-0
          }
        }
        ## if an ion is multiple-charged [M]n-
        else if(length(grep(".+\\[\\M\\]\\d\\-",toString(group_iso[j])))>0){
          charge_state<-as.numeric(sapply(strsplit(toString(group_iso[j]),"\\M\\]|\\-"), "[", 2))
          mono_flg[ion_idx[j]]<-0
          if(adduct_flg[ion_idx[j]]!=1){
            # new mono_mass is assigned only when this ion has no adduct annotation,
            # otherwise the mono_mass is calculated based on adduct annotation to deal
            # with clusterion (such as [2M+2H]2+)
            mono_mass[ion_idx[j]]<-charge_state*group_mz[j]+(charge_state-1)*PROTO_MASS
          }
          iso_flg[ion_idx[j]]<-0
        }
        else {
          iso_flg[ion_idx[j]]<-1
          mono_flg[ion_idx[j]]<-0
        }
      } else{
        iso_flg[ion_idx[j]]<-0
      }
    }
  }

  # %% For ions which are neither adducts nor isotopes and has not been assigned
  # %% a monoisotopic m/z yet, they are assumed to be monoisotopic and the
  # %% monoisotopic m/z are assigned.
  for (i in 1:length(pcgroup)){
    if (adduct_flg[i]==0 && iso_flg[i]==0 && is.na(mono_mass[i])){
      mono_flg[i]<-1
      mono_mass[i]<-mz$mz[i]
    }
  }
  ion_info<-ion_info[2:nrow(ion_info),]
  # %% If multiple ions are in the same ion group, they are adducts for each
  # %% other and share the same monoisotopic mass. If one of them is previously
  # %% assigned a mono isotopic mass, use the assigned one. If none of them has
  # %% monoisotopic mass (e.g. they are [M+Na] and [M+K]), use the one
  # %% calculated by CAMERA and stored in ion_info.
  temp_idx <- 1
  for (k in 1:nrow(ion_info)){
    grp_ion_idx<-which(ion_grp %in% ion_info[k,1])
    grp_mass<-mono_mass[grp_ion_idx]
    if (sum(!is.na(grp_mass))==1){
      mono_mass[grp_ion_idx]<-grp_mass[!is.na(grp_mass)]
    }
    else if ((sum(!is.na(grp_mass))>1)&&(sum(mono_flg[grp_ion_idx])==1)){
      # % If there are both [M+2H]2+ and [M]2+ type of annotation, that
      # % means there are both adducts and isotopes of this ion. In
      # % this case, there could be two valid monoisotopic mass from
      # % previous calculation. One from [M+2H] and the other from
      # % [M+H]([M+Na] will not give a mono mass). In this case, we use
      # % the monoisotopic m/z from [M+H]+
      mono_mass[grp_ion_idx]<-grp_mass[mono_flg[grp_ion_idx]==1]
    }
    else if (sum(!is.na(grp_mass))>=2){
      # % This seems to arise from CAMERA annotating two different ions with different
      # % m/z values as the same isotopic mass, presumably because they are close in mass.
      # % In this case, we don't use either monoisotopic m/z, but assume both are monoisotopic
      # % ions and recalculate them from the original m/z values.
      # warning("More than one mono mass value for adducts. \nRemoving conflicting adduct annotation.")
      # print(raw_single_annotation[grp_ion_idx,(ncol(raw_single_annotation)-2):ncol(raw_single_annotation)])
      ion_info_copy <- ion_info[-k,]
      for (l in 1:length(grp_ion_idx)){
        mass_tmp <- raw_single_annotation[grp_ion_idx[l],"mz"]
        mass_signature <- mass_tmp + PROTO_MASS
        curr_group <- raw_single_annotation[grp_ion_idx[l],"pcgroup"]
        ion_info_temp <-cbind(max(ion_info[,1])+temp_idx,mass_tmp,curr_group,mass_signature)
        ion_grp[grp_ion_idx[l]] <- ion_info_temp[,1]
        ion_info_copy <- rbind(ion_info_copy,ion_info_temp)
        temp_idx <- temp_idx + 1
      }
      # stop("More than one mono mass value for adducts")
    }
    else{
      mono_mass[grp_ion_idx] = ion_info[k, 2]
    }
  }
  # %% If multiple ions are in the same isotope group, they are isotopes for
  # %% each other and have the same monoisotopic mass. For one isotopic ions to
  # %% appear, its monoisotopic ion must present.

  iso_grp_no<- unique(iso_grp[!is.na(iso_grp)])

  for (k in 1:length(iso_grp_no)){
    iso_grp_idx<-which(iso_grp %in% iso_grp_no[k])
    iso_grp_mass<-mono_mass[iso_grp_idx]
    if(sum(!is.na(iso_grp_mass))==1){
      mono_mass[iso_grp_idx]<-iso_grp_mass[!is.na(iso_grp_mass)]
    }else if(sum(!is.na(iso_grp_mass))>1) {
      warning("Error: more than one mono mass value for isotopes for isotope group ",iso_grp_no[k])
    }else {
      warning("Error: there is no monoisotopic ion found for isotope group ", iso_grp_no[k])
    }
  }

  ##save(pcgroup,iso_grp_no,iso_grp_no,pcgroup_no,group_add,group_iso,adduct_flg,ambiguity_flg,iso_flg,isotopes,mono_flg,mono_mass,ion_info,mz,pcgroup,raw_single_annotation,ion_grp,iso_grp,file="Temporary")
  ##load(file="Temporary")

  ## Group the ions into metabolites
  label_flg<-matrix(0L,nrow = length(pcgroup), ncol = 1)
  metabolite_grp<-matrix(0L,nrow = length(pcgroup), ncol = 1)
  metabolite_idx<-1

  ## The adducts are first grouped into metabolites
  if(is.matrix(ion_info_copy)) {
    for (k in 1:nrow(ion_info_copy)){
      ion_grp_idx<-which(ion_grp %in% ion_info_copy[k,1])
      if(length(ion_grp_idx)>0){
        label_flg[ion_grp_idx]<-1
        metabolite_grp[ion_grp_idx]<-metabolite_idx
        metabolite_idx<-metabolite_idx+1
      }
      else{
        warning("Error: ion group does not exist for ion group ", ion_info_copy[k, 1])
      }
    }
  } else {
    for (k in 1:nrow(ion_info)){
      ion_grp_idx<-which(ion_grp %in% ion_info[k,1])
      if(length(ion_grp_idx)>0){
        label_flg[ion_grp_idx]<-1
        metabolite_grp[ion_grp_idx]<-metabolite_idx
        metabolite_idx<-metabolite_idx+1
      }
      else{
        warning("Error: ion group does not exist for ion group ", ion_info[k, 1])
      }
    }}


  ## The isotopes are then grouped into metabolites. If a metabolite number
  ## already exists from the adducts of the isotopics ions, use the existing
  ## one. Otherwise, use a new one.

  for (k in 1:length(iso_grp_no)) {
    iso_grp_idx<-which(iso_grp %in% iso_grp_no[k])
    curr_metabolite_grp_no<-metabolite_grp[iso_grp_idx]
    if(any(curr_metabolite_grp_no>0)){
      if (sum(curr_metabolite_grp_no>0)==1){
        metabolite_grp[iso_grp_idx]<-curr_metabolite_grp_no[curr_metabolite_grp_no>0]
        label_flg[iso_grp_idx]<-1
      }
      else {
        stop("Error: more than one metabolite group number for isotopes")
      }
    }
    else {
      metabolite_grp[iso_grp_idx]<-metabolite_idx
      label_flg[iso_grp_idx]<-1
      metabolite_idx<-metabolite_idx+1
    }
  }

  ## For the remaining ions, assign a metabolite number
  metabolite_grp[!label_flg] <- metabolite_idx:(metabolite_idx + sum(!label_flg)-1)
  uni_metabolite <- unique(metabolite_grp)
  mono_selector = matrix(0L,nrow = length(mono_flg), ncol = 1)
  for (i in 1:length(uni_metabolite)){
    idx <- which(metabolite_grp %in% uni_metabolite[i])
    idx_mono <- which(mono_flg[idx]>0)
    if (length(idx_mono)==1){
      mono_selector[idx[idx_mono]]<-1
    }
    else if(length(idx_mono)>=2){
      # print(raw_single_annotation[idx,(ncol(raw_single_annotation)-2):ncol(raw_single_annotation)])
      #stop("Error: more than one monoisotopic peak")
    }
    else{
      mono_selector[idx[1]]<-1;
    }
  }

  ## Change mono mz value to neutral monoisotopic mass
  mono_mass <- mono_mass + PROTO_MASS
  ## Output the parsed results into a csv file
  Peak.list<-raw_single_annotation
  # attributes(Peak.list)
  Peak.list <- cbind(Peak.list,mono_mass,metabolite_grp,mono_flg,adduct_flg,iso_flg,ambiguity_flg,mono_selector)
  colnames(Peak.list)[(ncol(raw_single_annotation)+1):(ncol(raw_single_annotation)+7)]<-cbind("mono_mass","metabolite_group","monoisotopic_flg","adduct_flg","isotope_flg","ambiguity_flg","Selection_flg")

  return(Peak.list)
  }

#' @title Combine phenotype data in Peak.list
#'
#' @export
#' @description Combine phenotype data for each metabolite group in Peak.list
#'   with summed intensity values
#' @param Sample.df a data frame with class info as columns.  Must contain a
#'   separate row entry for each unique sex/class combination. Must contain the
#'   columns \code{"Sex","Class","n","Endogenous"}.
#' @param Peak.list data frame. Must have the columns \code{"Correlation.stat",
#'   "metabolite_group", "mono_mass"}.  Should contain output columns from
#'   \code{XCMS} and ]code{CAMERA}, and additional columns from functions
#'   \code{match_Annotation()}, \code{calc_minfrac()}, \code{CAMERAParser()},
#'   \code{plot_metgroup()}.
#' @param Summed.list data frame containing metabolite group as first column and
#'   the rest summed intensities for each sample
#' @param search.par a single-row data frame with 11 variables containing
#'   user-defined search parameters. Must contain the columns
#'   \code{"ppm"},\code{"rt"},\code{"Voidrt"},\code{"Corr.stat.pos"},\code{"Corr.stat.neg"},
#'    \code{"CV"},\code{"Minfrac"}, \code{"Endogenous"},
#'   \code{"Solvent"},\code{"gen.plots"}, \code{"keep.singletons"}.
#' @param QC.id character vector specifying identifier in filename designating a
#'   Pooled QC sample.  Only the first value will be used.  Default is
#'   \code{"Pooled_QC_"}
#' @param BLANK a logical indicating whether blanks are being evaluated
#' @param IonMode a character string defining the ionization mode.  Must be one
#'   of \code{c("Positive","Negative")}.
#' @return Peak.list.summed with the munged phenotype columns up front followed
#'   by QC and sample columns
#' @importFrom plyr ddply
#' @importFrom dplyr '%>%' mutate_if summarise bind_cols
#' @importFrom utils str txtProgressBar setTxtProgressBar
#' @importFrom stringr str_count
#' @examples
#' library(LUMA)
#' file <- system.file('extdata','Search_Parameters.txt', package = "LUMA") # is
#' # case sensitive on Linux
#' search.par <- read.table(file, sep = "\t", header = TRUE) #Ignore Warning message
#' file2 <- system.file('extdata','Sample_Class.txt', package = "LUMA") # is
#' # case sensitive on Linux
#' Sample.df <- read.table(file2, sep = "\t", header = TRUE) #Ignore Warning message
#' Peak.list <- LUMA::Peaklist_Pos$output_parsed
#' Summed.list <- sum_features(Peak.list = Peak.list, Sample.df = Sample.df ,
#'                        search.par = search.par, BLANK = FALSE, IonMode = "Positive")
#' test <- combine_phenodata(Sample.df = Sample.df, Peak.list = Peak.list,
#' Summed.list = Summed.list, search.par = search.par, BLANK = FALSE, IonMode =
#' "Positive")
#' nrow(Peak.list) - nrow(test) ##Combines multiple features into single entries per metabolite
combine_phenodata <- function(Sample.df, Peak.list, Summed.list, search.par, QC.id, BLANK, IonMode) {

  #Set Default Values
  if(missing(QC.id))
    QC.id <- "Pooled_QC_"

  mylist <- .gen_res(IonMode,search.par,Peak.list,Sample.df,BLANK,QC.id)
  Peaklist_corstat <- mylist[[1]]
  res <- mylist[[2]]

  # Creates a data frame with only the pheno data columns combined for each metabolite group
  pheno.list <- Peaklist_corstat[sapply(res, function(x) length(x) < 1)]  #Extracts all of the pheno columns for combining by metabolite group
  pheno.list[,"metabolite_group"] <- Peaklist_corstat[,"metabolite_group"]
  pheno.list <- pheno.list %>% mutate_if(is.factor, as.character)
  mylist <- sort(unique(Peaklist_corstat$metabolite_group))
  new.pheno.list <- as.data.frame(mylist)
  # i = 11 ##for debugging purposes
  total = length(colnames(pheno.list))
  cat("\n\nCombining metadata for isotopes and adducts.\n\n\n")
  pb <- txtProgressBar(min = 0, max = total, style = 3)

  for (i in 1:total) {
    if (is.character(pheno.list[, i])) {
      # works new.pheno.list[,colnames(pheno.list)[i]] <- ddply(pheno.list, .(metabolite_group), summarise,
      # X=paste0(MS.ID, collapse = ';'))[2]
      new.pheno.list[, colnames(pheno.list)[i]] <- ddply(pheno.list, ~metabolite_group, function(x) .mypaste(x,
                                                                                                            i))[2]
    } else if (!is.na(.isWhole(pheno.list[, i]))) {
      new.pheno.list[, colnames(pheno.list)[i]] <- ddply(pheno.list, ~metabolite_group, function(x) .mypaste(x,
                                                                                                            i))[2]
    }
    setTxtProgressBar(pb, i)
  }

  # Create original pheno list to be replaced once combined with the summed results
  Iso.only.list <- subset(pheno.list, pheno.list$Correlation.stat == 1)  #extracts only the prime feature for each metabolite group across all columns

  # Replace pheno columns with combined phenotype data
  new.pheno.list <- new.pheno.list[, -which(colnames(new.pheno.list) %in% "metabolite_group")]
  nm <- intersect(colnames(new.pheno.list), colnames(Iso.only.list))
  colnames(new.pheno.list) <- c("metabolite_group", colnames(new.pheno.list)[-1])

  new.pheno.list[["MS.ID"]][match(Iso.only.list$metabolite_group, new.pheno.list$metabolite_group)]
  Iso.only.list[nm] <- lapply(nm, function(x) new.pheno.list[[x]][match(Iso.only.list$metabolite_group, new.pheno.list$metabolite_group)])

  # Combine munged pheno columns with summed data
  df1 <- Iso.only.list[order(Iso.only.list$metabolite_group), ]
  df2 <- Summed.list[order(Summed.list$metabolite_group), ] %>% select(-c("metabolite_group"))
  Peak.list.summed <- bind_cols(df1, df2)
  temp <- as.character(Peak.list.summed$MS.ID)

  #Drop Singletons
  no.features <- str_count(temp, ";") + 1
  drop.singletons = !search.par[1, "keep.singletons"]
  if (drop.singletons & BLANK == FALSE) {
    drops <- grep(1, no.features, value = FALSE)
    temp <- Peak.list.summed[drops, ]

    if(!0 %in% dim(temp)) {

      Peak.list.summed <- Peak.list.summed[-drops, ]

    }
  }

  #Combine monomolecular mass values into a single value
  myMonoMass <- Peak.list.summed$mono_mass
  allMonoMass <- strsplit(myMonoMass, split = ";")
  allMonoMass <- lapply(allMonoMass, function(x) as.numeric(x))
  meanMonoMass <- lapply(allMonoMass, function(x) mean(x))
  Peak.list.summed$mono_mass <- unlist(meanMonoMass)

  #Combine rt values into a single value
  myRT <- Peak.list.summed$rt
  allRT <- strsplit(myRT, split = ";")
  allRT <- lapply(allRT, function(x) as.numeric(x))
  meanRT <- lapply(allRT, function(x) mean(x))
  Peak.list.summed$meanRT <- unlist(meanRT)

  return(Peak.list.summed)
}
USEPA/LUMA documentation built on Aug. 29, 2020, 1:40 p.m.