R/rampReactionQueries.R

Defines functions checkReactionInputIds getReactionRheaURLs getReactionClassStatsOnAnalytes getReactionClassStats getReactionSourceIdsFromReactionQuery getReactionParticpantCounts getRampSourceInfoFromAnalyteIDs combineStringLists getRheaMetabolitesForProteins getRheaEnzymesAndTransportersForMetabolites getRheaAnalyteReactionAssociations getReactionDetails getReactionParticipants getReactionClassesForAnalytes getReactionsForRaMPGeneIds getReactionsForRaMPCompoundIds getReactionsForSourceProteinIds getReactionsForSourceCompoundIds getReactionsForAnalytes

Documented in combineStringLists getRampSourceInfoFromAnalyteIDs getReactionClassesForAnalytes getReactionClassStats getReactionClassStatsOnAnalytes getReactionDetails getReactionParticipants getReactionParticpantCounts getReactionRheaURLs getReactionsForAnalytes getReactionsForRaMPCompoundIds getReactionsForRaMPGeneIds getReactionsForSourceCompoundIds getReactionsForSourceProteinIds getReactionSourceIdsFromReactionQuery getRheaAnalyteReactionAssociations getRheaEnzymesAndTransportersForMetabolites getRheaMetabolitesForProteins

# RaMP Reaction Queries

#' getReactionsForAnalytes
#'
#' @param db a RaMP databse object
#' @param analytes list of analytes
#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)
#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)
#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
#' @param rxnDirs character vector of length > 1, specifying reaction directions to return  c("UN", "LR", "RL", "BD", "ALL"), default = c("UN").
#' @param includeRxnURLs if TRUE, urls to Rhea.org will be delivered in the result dataframe for each reaction
#'
#' @return a list of reaction information on each input analyte, separate data.frame for metabolites, genes, and common reactions
#' @export
#'
getReactionsForAnalytes <- function(db = RaMP(), analytes, onlyHumanMets=F, humanProtein=T, includeTransportRxns=F, rxnDirs=c("UN"), includeRxnURLs=F) {

  message("Running getReactionsForAnalytes()")

  genes <- data.frame()
  metabolites <- data.frame()
  mdf <- data.frame()
  gdf <- data.frame()

  idLists = checkReactionInputIds("getReactionsForAnalytes", analytes)

  if((length(idLists$proteinIds) + length(idLists$metIds) == 0)) {
    message("Wrong ID types. chebi and/or uniprot ids are required. Returning empty dataframe.")
    return(data.frame())
  }

  mets <- idLists$metIds
  proteins <- idLists$proteinIds


  if(!is.null(mets) && length(mets)>0) {
    mdf <- RaMP:::getReactionsForSourceCompoundIds(db = db, compoundIds=mets, onlyHumanMets=onlyHumanMets, humanProtein=humanProtein, includeTransportRxns=includeTransportRxns, rxnDirs=rxnDirs)
  }

  if(!is.null(proteins) && length(proteins)>0) {
    gdf <- RaMP:::getReactionsForSourceProteinIds(db = db, proteinIds=proteins, onlyHumanMets=onlyHumanMets, humanProtein=humanProtein, includeTransportRxns)
  }

  resultList <- list()
  resultList[['met2rxn']] <- mdf
  resultList[['prot2rxn']] <- gdf
  resultList[['metProteinCommonReactions']] <- data.frame()

  # find common reactions
  if(nrow(mdf) > 0 && nrow(gdf) > 0) {
    mRampRxnIds <- unlist(mdf$rxn_source_id)
    gRampRxnIds <- unlist(gdf$rxn_source_id)
    commonRxnIds <- intersect(mRampRxnIds, gRampRxnIds)
    if(length(commonRxnIds) > 0) {
      mRxn <- mdf[mdf$rxn_source_id %in% commonRxnIds,]
      gRxn <- gdf[gdf$rxn_source_id %in% commonRxnIds,]

      # one row per reaction, include concat mets, concat proteins
      # probably a more streamlined way to do this...
      commonRampRxnList <- c()
      commonRxnList <- c()
      mSourceIds <- c()
      mRxnSourceNames <- c()
      gRxnSourceIds <- c()
      gRxnSourceName <- c()
      gRxnUniprot <- c()
      commonRxnLabel <- c()
      commonRxnHTMLEq <- c()
      hasHumanProtein <- c()
      onlyHumanMets <- c()
      isTransport <- c()
      rxnDir <- c()
      for(rxn in commonRxnIds) {
        commonRampRxnList <- c(commonRampRxnList, rxn)
        commonRxnList <- c(commonRxnList, paste0(unique(mRxn$rxn_source_id[mRxn$rxn_source_id == rxn]), collapse = ','))
        mSourceIds <- c(mSourceIds, paste0(unique(mRxn$met_source_id[mRxn$rxn_source_id == rxn]), collapse = ','))
        mRxnSourceNames <- c(mRxnSourceNames, paste0(unique(mRxn$met_name[mRxn$rxn_source_id == rxn]), collapse = ','))

        #gRxnSourceIds <- c(gRxnSourceIds, paste0(unique(gRxn$sourceId[gRxn$rxn_source_id == rxn]), collapse = ','))
        gRxnUniprot <- c(gRxnUniprot, paste0(unique(gRxn$uniprot[gRxn$rxn_source_id == rxn]), collapse = ','))
        gRxnSourceName <- c(gRxnSourceName, paste0(unique(gRxn$protein_name[gRxn$rxn_source_id == rxn]), collapse = ','))

        rxnDir <- c(rxnDir, paste0(unique(mRxn$direction[mRxn$rxn_source_id == rxn]), collapse = ','))

        commonRxnLabel <- c(commonRxnLabel, paste0(unique(mRxn$label[mRxn$rxn_source_id == rxn]), collapse = ','))
        commonRxnHTMLEq <- c(commonRxnHTMLEq, paste0(unique(mRxn$html_equation[mRxn$rxn_source_id == rxn]), collapse = ','))

        isTransport <- as.integer(c(isTransport, paste0(unique(mRxn$is_transport[mRxn$rxn_source_id == rxn]), collapse = ',')))
        hasHumanProtein <- as.integer(c(hasHumanProtein, paste0(unique(mRxn$has_human_prot[mRxn$rxn_source_id == rxn]), collapse = ',')))
        justHumanMets <- as.integer(c(onlyHumanMets, paste0(unique(mRxn$only_human_mets[mRxn$rxn_source_id == rxn]), collapse = ',')))
      }
      commonReactions <- data.frame(list('metabolites' = mSourceIds, 'met_names' = mRxnSourceNames,
                                         'uniprot' = gRxnUniprot, 'proteinNames' = gRxnSourceName,
                                         'reactionId' = commonRxnList, 'rxn_dir' = rxnDir,'rxn_label' = commonRxnLabel,
                                         'rxn_html_label' = commonRxnHTMLEq,
                                         'is_transport' = isTransport,
                                         'has_human_protein' = hasHumanProtein,
                                         'only_human_mets' = justHumanMets))



      resultList[['metProteinCommonReactions']] <- commonReactions
    }
  }

  # let's add analyte hits per pathway to help sort by rxns with greater support
  if(nrow(resultList$met2rxn) > 0) {
    metCountDf <- data.frame(table(resultList$met2rxn$rxn_source_id))
    colnames(metCountDf) <- c("rxn_source_id", "metHitCount")
    resultList$met2rxn <- merge(x=resultList$met2rxn, y=metCountDf,by.x="rxn_source_id", by.y="rxn_source_id")
    resultList$met2rxn <- resultList$met2rxn[order(-resultList$met2rxn$metHitCount, resultList$met2rxn$rxn_source_id, resultList$met2rxn$substrate_product),]
    colnames(resultList$met2rxn) <- c("reactionId", "metSourceId", "substrateProductFlag", "isCofactor", "metName", 'isTransport', "label", "direction", "equation", "htmlEquation", "ecNumber",
                                      "hasHumanProtein", "onlyHumanMets", "metHitCount")
    resultList$met2rxn <- resultList$met2rxn[,c(1,14,2, 5, 3, 4, 7, 9, 10, 8, 11, 6, 12:13)]
    if(includeRxnURLs) {
      resultList$met2rxn$rheaRxnURL <- getReactionRheaURLs(unlist(resultList$met2rxn$reactionId))
    }
  }

  if(nrow(resultList$prot2rxn) > 0) {
    colnames(resultList$prot2rxn) <- c("uniprot", "proteinName", "reactionId", "isTransport", "label", "direction", "equation", "htmlEquation", "ecNumber", "hasHumanProtein", "onlyHumanMets")
    resultList$prot2rxn <- resultList$prot2rxn[,c(3, 1:2, 5, 7, 6, 9, 4, 10, 11)]
    if(includeRxnURLs) {
      resultList$prot2rxn$rheaRxnURL <- getReactionRheaURLs(unlist(resultList$prot2rxn$reactionId))
    }
  }

  if(nrow(resultList$metProteinCommonReactions) > 0) {
    colnames(resultList$metProteinCommonReactions) <- c("metabolites", "metNames", "uniprot", "proteinName", "reactionId", "direction", "label", "htmlLabel", "isTransport", "hasHumanProtein", "onlyHumanMets")
    resultList$metProteinCommonReactions <- resultList$metProteinCommonReactions[,c(5, 1:4, 7:8, 6, 9:11)]
    if(includeRxnURLs) {
      resultList$metProteinCommonReactions$rheaRxnURL <- getReactionRheaURLs(unlist(resultList$metProteinCommonReactions$reactionId))
    }
  }

  message("Finished getReactionsForAnalytes()")

  return(resultList)
}


#' getReactionsForSourceCompoundIds returns reactions for a collection of ChEBI input Ids
#'
#' @param db RaMP object
#' @param compoundIds list of ChEBI compound ids (prefixed with chebi:)
#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)
#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)
#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
#'
#' @return returns a dataframe of reaction information for each ramp compound id
#'
getReactionsForSourceCompoundIds <- function(db = RaMP(), compoundIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {

  idStr <- listToQueryString(compoundIds)
  query <- paste0("select mr.met_source_id, mr.substrate_product, mr.is_cofactor, mr.met_name,
  rxn.rxn_source_id, rxn.is_transport, rxn.label, rxn.direction, rxn.equation, rxn.html_equation, rxn.ec_num, rxn.has_human_prot, rxn.only_human_mets
  from reaction2met mr, reaction rxn
  where mr.met_source_id in (",idStr,") and rxn.rxn_source_id = mr.rxn_source_id")

  if(length(rxnDirs) == 1) {
    query <- paste0(query, " and rxn.direction = '",rxnDirs[1],"'")
  } else if(length(rxnDirs)>1) {
    query <- paste0(query, " and rxn.direction in (",listToQueryString(rxnDirs),")")
  } else {
    print("rxnDirs must be of length > 0")
  }

  if(humanProtein) {
    query <- paste0(query," and rxn.has_human_prot = 1")
  }

  if(onlyHumanMets) {
    query <- paste0(query, " and rxn.only_human_mets = 1")
  }

  if(!includeTransportRxns) {
    query <- paste0(query, " and rxn.is_transport = 0")
  }

  df <- RaMP::runQuery(query, db)

  return(df)
}



#' getReactionsForSourceProteinIds returns reactions for a collection of source input compound ids
#'
#' @param db RaMP object
#' @param proteinIds list uniprot accessions with prefix (uniprot:)
#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)
#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)
#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
#'
#' @return returns a dataframe of reaction information for each ramp compound id
#'
getReactionsForSourceProteinIds <- function(db = RaMP(), proteinIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {

  idStr <- listToQueryString(proteinIds)
  query <- paste0("select gr.uniprot, gr.protein_name, rxn.rxn_source_id,
                  rxn.is_transport, rxn.label, rxn.direction, rxn.equation, rxn.html_equation,
                  rxn.ec_num, rxn.has_human_prot, rxn.only_human_mets from reaction2protein gr,
                  reaction rxn where gr.uniprot in (",idStr,") and rxn.rxn_source_id = gr.rxn_source_id")

  if(length(rxnDirs) == 1) {
    query <- paste0(query, " and rxn.direction = '",rxnDirs[1],"'")
  } else if(length(rxnDirs)>1) {
    query <- paste0(query, " and rxn.direction in (",listToQueryString(rxnDirs),")")
  } else {
    print("rxnDirs must be of length > 0")
  }

  if(humanProtein) {
    query <- paste0(query, " and rxn.has_human_prot = 1")
  }

  if(onlyHumanMets) {
    query <- paste0(query, " and rxn.only_human_mets = 1")
  }

  if(!includeTransportRxns) {
    query <- paste0(query, " and rxn.is_transport = 0")
  }

  df <- RaMP::runQuery(query, db)

  return(df)
}



#' getReactionsForRaMPCompoundIds returns reactions for a collection of RaMP compound ids
#'
#' @param db RaMP object
#' @param rampCompoundIds list of ramp compound ids
#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)
#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)
#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
#'
#' @return returns a dataframe of reaction information for each ramp compound id
#'
getReactionsForRaMPCompoundIds <- function(db = RaMP(), rampCompoundIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {

  idStr <- listToQueryString(rampCompoundIds)
  query <- paste0("select mr.ramp_rxn_id, mr.ramp_cmpd_id, mr.met_source_id, mr.substrate_product, mr.is_cofactor, mr.met_name,
  rxn.rxn_source_id, rxn.is_transport, rxn.label, rxn.direction, rxn.equation, rxn.html_equation, rxn.ec_num, rxn.has_human_prot, rxn.only_human_mets
  from reaction2met mr, reaction rxn
  where mr.ramp_cmpd_id in (",idStr,") and rxn.ramp_rxn_id = mr.ramp_rxn_id")

  if(length(rxnDirs) == 1) {
    query <- paste0(query, " and rxn.direction = '",rxnDirs[1],"'")
  } else if(length(rxnDirs)>1) {
    query <- paste0(query, " and rxn.direction in (",listToQueryString(rxnDirs),")")
  } else {
    print("rxnDirs must be of length > 0")
  }

  if(humanProtein) {
    query <- paste0(query," and rxn.has_human_prot = 1")
  }

  if(onlyHumanMets) {
    query <- paste0(query, " and rxn.only_human_mets = 1")
  }

  if(!includeTransportRxns) {
    query <- paste0(query, " and rxn.is_transport = 0")
  }

  df <- RaMP::runQuery(query, db)

  return(df)
}



#' getReactionsForRaMPGeneIds returns reactions for a collection of ramp compound ids
#'
#' @param rampGeneIds list of ramp compound ids
#' @param onlyHumanMets boolean to only return pathways containing only human metabolites (ChEBI ontology) (dev in progress)
#' @param humanProtein boolean to only control pathways catalyzed by a human proteins (having human Uniprot) (dev in progress)
#' @param includeTransportRxns if TRUE, returns metabolic and transport reactions
#'
#' @return returns a dataframe of reaction information for each ramp compound id
#'
getReactionsForRaMPGeneIds <- function(db = RaMP(), rampGeneIds, onlyHumanMets=F, humanProtein=F, includeTransportRxns=F, rxnDirs=c("UN")) {

  idStr <- listToQueryString(rampGeneIds)
  query <- paste0("select gr.ramp_rxn_id, gr.ramp_gene_id, gr.uniprot, gr.protein_name, rxn.rxn_source_id,
                  rxn.is_transport, rxn.label, rxn.direction, rxn.equation, rxn.html_equation,
                  rxn.ec_num, rxn.has_human_prot, rxn.only_human_mets from reaction2protein gr,
                  reaction rxn where gr.ramp_gene_id in (",idStr,") and rxn.ramp_rxn_id = gr.ramp_rxn_id")

  if(length(rxnDirs) == 1) {
    query <- paste0(query, " and rxn.direction = '",rxnDirs[1],"'")
  } else if(length(rxnDirs)>1) {
    query <- paste0(query, " and rxn.direction in (",listToQueryString(rxnDirs),")")
  } else {
    print("rxnDirs must be of length > 0")
  }

  if(humanProtein) {
    query <- paste0(query, " and rxn.has_human_prot = 1")
  }

  if(onlyHumanMets) {
    query <- paste0(query, " and rxn.only_human_mets = 1")
  }

  if(!includeTransportRxns) {
    query <- paste0(query, " and rxn.is_transport = 0")
  }

  df <- RaMP::runQuery(query, db)

  return(df)
}


#' getReactionClassesForAnalytes returns reactions class and EC numbers for a collection of input compound ids
#'
#' @param db a RaMP database object
#' @param analytes list of analyte ids
#' @param multiRxnParticipantCount minimum number of analytes to report a reaction class, default = 1
#' @param humanProtein require reactions to have a human protein (enzyme or transporter), default True
#' @param concatResults returns all reaction class levels in one dataframe rather than a list of 3 dataframes
#' @param includeReactionIDs adds the list of reaction ids for each reaction class
#' @param useIdMapping allows one to fuzzy match input ids to chebi and uniprot to support reaction queries.
#'
#' @return returns a three dataframes of reaction EC classe information, one for each EC level
#' @export
 getReactionClassesForAnalytes <- function(db = RaMP(), analytes, multiRxnParticipantCount=1, humanProtein=TRUE,
                                           concatResults=F, includeReactionIDs=F, useIdMapping = F) {

  print("Starting reaction class query...")

  if(!useIdMapping) {
    ids = RaMP:::checkReactionInputIds("getReactionClassesForAnalytes", analytes)
    analytes = c(ids$metIds, ids$proteinIds)

    if(length(analytes) == 0) {
      message("None of the ids are chebi or uniprot ids.")
      message("Returning empty list. Use the parameter useIDMapping = T to enable id mapping to chebi and uniprot ids.")
      return(list())
    }
  }

  analytesStr <- RaMP:::listToQueryString(analytes)

  # base reaction stats, reactions per reaction class
  rxnClassStats <- RaMP:::getReactionClassStats(db, humanProtein)

  # base reaction stats for analytes, distinct analyte counts (proteins or mets) per reaction class
  analyteClassStats <- RaMP:::getReactionClassStatsOnAnalytes(db, humanProtein)

  # need to be careful to enforce request for only reactions with a human protein
  humanProteinArg = ''
  if(humanProtein) {
    humanProteinArg = ' and rxn.has_human_prot = 1'
  }

  # if we want to enforce reactions that have more than
  # one participant in the input list
  #
  # a bit more work to query direct analyte to reaction ids
  # who would do it like this? :) (eye roll)
  if(useIdMapping) {
    if(multiRxnParticipantCount > 1) {

      analyte2Rxn = RaMP::getReactionsForAnalytes(db=db, analytes=analytes, humanProtein = humanProtein)
      rxnParticipantData <- RaMP:::getReactionParticpantCounts(analyte2Rxn, multiRxnParticipantCount)

      if(rxnParticipantData[['total_rxns_retained']] == 0) {

        message("The input analytes map to ", rxnParticipantData$total_mapped_rxns," reactions.")
        message("The input analytes to reaction mapping do not support the multiRxnParticipantCount cutoff of ",multiRxnParticipantCount, ".")
        message("Returing empty result. Check input id prefix format and possibley reduce multiRxnParticipantCount cutoff.")

        if(concatResults) {
          return(data.frame())
        }
        else {
          return(list())
        }
      }

      # reaction list is already filtered
      keeperRxns <- unlist(rxnParticipantData$merged_rxn2analyte_count$rxn_source_id)
      keeperRxnStr <- RaMP:::listToQueryString(keeperRxns)

      metQuery <- paste0("select distinct c.rxn_class, c.rxn_class_hierarchy, c.rxn_class_ec, c.ec_level, count(distinct(r.rxn_source_id)) as rxn_count, count(distinct(r.ramp_cmpd_id)) as met_count, group_concat(distinct(r.rxn_source_id)) as met_reactions
        from reaction_ec_class c, reaction2met r, source s, reaction rxn
        where s.sourceId in (",analytesStr,") and r.rxn_source_id in (",keeperRxnStr,")
        and r.ramp_cmpd_id = s.rampId
        and c.rxn_source_id = r.rxn_source_id
        and rxn.rxn_source_id = r.rxn_source_id",
                         humanProteinArg,
                         " group by c.rxn_class_hierarchy, c.rxn_class, c.rxn_class_ec, c.ec_level
        order by ec_level asc, rxn_count desc")

      metResult <- RaMP::runQuery(sql=metQuery, db = db)

      proteinQuery <- paste0("select distinct c.rxn_class, c.rxn_class_hierarchy, c.rxn_class_ec, c.ec_level, count(distinct(r.rxn_source_id)) as rxn_count, count(distinct(r.ramp_gene_id)) as protein_count, group_concat(distinct(r.rxn_source_id)) as protein_reactions
          from reaction_ec_class c, reaction2protein r, source s, reaction rxn
          where s.sourceId in (",analytesStr,") and r.rxn_source_id in (",keeperRxnStr,")
          and r.ramp_gene_id = s.rampId
          and c.rxn_source_id = r.rxn_source_id
          and rxn.rxn_source_id = r.rxn_source_id",
                             humanProteinArg,
                             " group by c.rxn_class_hierarchy, c.rxn_class, c.rxn_class_ec, c.ec_level
          order by ec_level asc, rxn_count desc")

      proteinResult <- RaMP::runQuery(sql=proteinQuery, db=db)

    } else {

      metQuery <- paste0("select distinct c.rxn_class, c.rxn_class_hierarchy, c.rxn_class_ec, c.ec_level, count(distinct(r.rxn_source_id)) as rxn_count,
        count(distinct(r.ramp_cmpd_id)) as met_count, group_concat(distinct(r.rxn_source_id))
        as met_reactions from reaction_ec_class c, reaction2met r, source s, reaction rxn
        where s.sourceId in (",analytesStr,")
        and r.ramp_cmpd_id = s.rampId
        and c.rxn_source_id = r.rxn_source_id
        and rxn.rxn_source_id = r.rxn_source_id",
                         humanProteinArg,
                         " group by c.rxn_class_hierarchy, c.rxn_class, c.rxn_class_ec, c.ec_level
        order by ec_level asc, rxn_count desc")

      metResult <- RaMP::runQuery(sql=metQuery, db = db)

      proteinQuery <- paste0("select distinct c.rxn_class, c.rxn_class_hierarchy, c.rxn_class_ec, c.ec_level, count(distinct(r.rxn_source_id)) as rxn_count, count(distinct(r.ramp_gene_id)) as protein_count, group_concat(distinct(r.rxn_source_id)) as protein_reactions
          from reaction_ec_class c, reaction2protein r, source s, reaction rxn
          where s.sourceId in (",analytesStr,")
          and r.ramp_gene_id = s.rampId
          and c.rxn_source_id = r.rxn_source_id
          and rxn.rxn_source_id = r.rxn_source_id",
                             humanProteinArg,
                             " group by c.rxn_class_hierarchy, c.rxn_class, c.rxn_class_ec, c.ec_level
          order by ec_level asc, rxn_count desc")

      proteinResult <- RaMP::runQuery(sql=proteinQuery, db=db)
    }

    metRxns <- metResult[,c('rxn_class', 'rxn_class_hierarchy', 'met_reactions')]
    proteinRxns <- proteinResult[,c('rxn_class_hierarchy', 'protein_reactions')]

    mergedRxnData <- merge(x=metRxns, y=proteinRxns, by.x='rxn_class_hierarchy', by.y='rxn_class_hierarchy', all.x=T, all.y=T)
    mergedRxnData$met_reactions[is.na(mergedRxnData$met_reactions)] <- ""
    mergedRxnData$protein_reactions[is.na(mergedRxnData$protein_reactions)] <- ""

    if(nrow(mergedRxnData) == 0) {

      message("The input analytes map to 0 reactions.")
      message("Returing empty result. Check input id prefix format.")

      if(concatResults) {
        return(data.frame())
      }
      else {
        return(list())
      }
    }

  } else {

    # not using the source table and ID mapping
    if(multiRxnParticipantCount > 1) {

      analyte2Rxn = RaMP::getReactionsForAnalytes(db=db, analytes=analytes, humanProtein = humanProtein)
      rxnParticipantData <- RaMP:::getReactionParticpantCounts(analyte2Rxn, multiRxnParticipantCount)

      if(rxnParticipantData[['total_rxns_retained']] == 0) {

        message("The input analytes map to ", rxnParticipantData$total_mapped_rxns," reactions.")
        message("The input analytes to reaction mapping do not support the multiRxnParticipantCount cutoff of ",multiRxnParticipantCount, ".")
        message("Returing empty result. Check input id prefix format and possibley reduce multiRxnParticipantCount cutoff.")

        if(concatResults) {
          return(data.frame())
        }
        else {
          return(list())
        }
      }

      # reaction list is already filtered
      keeperRxns <- unlist(rxnParticipantData$merged_rxn2analyte_count$rxn_source_id)
      keeperRxnStr <- RaMP:::listToQueryString(keeperRxns)

      metQuery <- paste0("select distinct c.rxn_class, c.rxn_class_hierarchy, c.rxn_class_ec, c.ec_level, count(distinct(r.rxn_source_id)) as rxn_count, count(distinct(r.met_source_id)) as met_count, group_concat(distinct(r.rxn_source_id)) as met_reactions
        from reaction_ec_class c, reaction2met r, reaction rxn
        where r.met_source_id in (",analytesStr,") and r.rxn_source_id in (",keeperRxnStr,")
        and c.rxn_source_id = r.rxn_source_id
        and rxn.rxn_source_id = r.rxn_source_id",
                         humanProteinArg,
                         " group by c.rxn_class_hierarchy, c.rxn_class, c.rxn_class_ec, c.ec_level
        order by ec_level asc, rxn_count desc")

      metResult <- RaMP::runQuery(sql=metQuery, db = db)

      proteinQuery <- paste0("select distinct c.rxn_class, c.rxn_class_hierarchy, c.rxn_class_ec, c.ec_level, count(distinct(r.rxn_source_id)) as rxn_count, count(distinct(r.uniprot)) as protein_count, group_concat(distinct(r.rxn_source_id)) as protein_reactions
          from reaction_ec_class c, reaction2protein r, source s, reaction rxn
          where r.uniprot in (",analytesStr,") and r.rxn_source_id in (",keeperRxnStr,")
          and c.rxn_source_id = r.rxn_source_id
          and rxn.rxn_source_id = r.rxn_source_id",
                             humanProteinArg,
                             " group by c.rxn_class_hierarchy, c.rxn_class, c.rxn_class_ec, c.ec_level
          order by ec_level asc, rxn_count desc")

      proteinResult <- RaMP::runQuery(sql=proteinQuery, db=db)

    } else {

      metQuery <- paste0("select distinct c.rxn_class, c.rxn_class_hierarchy, c.rxn_class_ec, c.ec_level, count(distinct(r.rxn_source_id)) as rxn_count,
        count(distinct(r.met_source_id)) as met_count, group_concat(distinct(r.rxn_source_id))
        as met_reactions from reaction_ec_class c, reaction2met r, reaction rxn
        where r.met_source_id in (",analytesStr,")
        and c.rxn_source_id = r.rxn_source_id
        and rxn.rxn_source_id = r.rxn_source_id",
                         humanProteinArg,
                         " group by c.rxn_class_hierarchy, c.rxn_class, c.rxn_class_ec, c.ec_level
        order by ec_level asc, rxn_count desc")

      metResult <- RaMP::runQuery(sql=metQuery, db = db)

      proteinQuery <- paste0("select distinct c.rxn_class, c.rxn_class_hierarchy, c.rxn_class_ec, c.ec_level, count(distinct(r.rxn_source_id)) as rxn_count, count(distinct(r.uniprot)) as protein_count, group_concat(distinct(r.rxn_source_id)) as protein_reactions
          from reaction_ec_class c, reaction2protein r, reaction rxn
          where r.uniprot in (",analytesStr,")
          and c.rxn_source_id = r.rxn_source_id
          and rxn.rxn_source_id = r.rxn_source_id",
                             humanProteinArg,
                             " group by c.rxn_class_hierarchy, c.rxn_class, c.rxn_class_ec, c.ec_level
          order by ec_level asc, rxn_count desc")

      proteinResult <- RaMP::runQuery(sql=proteinQuery, db=db)
    }

    metRxns <- metResult[,c('rxn_class', 'rxn_class_hierarchy', 'met_reactions')]
    proteinRxns <- proteinResult[,c('rxn_class_hierarchy', 'protein_reactions')]

    mergedRxnData <- merge(x=metRxns, y=proteinRxns, by.x='rxn_class_hierarchy', by.y='rxn_class_hierarchy', all.x=T, all.y=T)
    mergedRxnData$met_reactions[is.na(mergedRxnData$met_reactions)] <- ""
    mergedRxnData$protein_reactions[is.na(mergedRxnData$protein_reactions)] <- ""

    if(nrow(mergedRxnData) == 0) {

      message("The input analytes map to 0 reactions.")
      message("Returing empty result. Check input id prefix format.")

      if(concatResults) {
        return(data.frame())
      }
      else {
        return(list())
      }
    }

  }


  mergedRxnData$rxn_count <- 0

  for(i in 1:nrow(mergedRxnData)) {
    mergedRxnData$rxn_count[i]<- length(union(unlist(strsplit(mergedRxnData$met_reactions[i],',')), unlist(strsplit(mergedRxnData$protein_reactions[i],','))))
  }

  combinedResult <- merge(x=metResult, y=proteinResult, by.x=c('rxn_class_hierarchy', 'rxn_class', 'rxn_class_ec', 'ec_level'), by.y=c('rxn_class_hierarchy', 'rxn_class', 'rxn_class_ec', 'ec_level'), all.x=T, all.y=T)
  combinedResult$rxn_count.x[is.na(combinedResult$rxn_count.x)] <- 0
  combinedResult$rxn_count.y[is.na(combinedResult$rxn_count.y)] <- 0

  combinedResult$met_count[is.na(combinedResult$met_count)] <- 0
  combinedResult$protein_count[is.na(combinedResult$protein_count)] <- 0
  combinedResult$protein_reactions[is.na(combinedResult$protein_reactions)] <- ""
  combinedResult$met_reactions[is.na(combinedResult$met_reactions)] <- ""

  rxnStats <- RaMP:::combineStringLists(combinedResult$met_reactions, combinedResult$protein_reactions)

  combinedResult <- cbind(combinedResult, rxnStats)

  combo <- combinedResult[, (!colnames(combinedResult) %in% c("rxn_count.x", "rxn_count.y", "met_count.y", "met_reactions", "protein_reactions"))]

  combo <- combo[order(combo$ec_level, combo$rxn_count, combo$met_count, combo$protein_count, decreasing = T),]

  # now we need to merge in overall reaction and analyte stats
  combo <- merge(x=combo, y=analyteClassStats[['metStats']], by.x='rxn_class_hierarchy', by.y='rxn_class_hierarchy', all.x=F, all.y=F)
  combo <- merge(x=combo, y=analyteClassStats[['protStats']], by.x='rxn_class_hierarchy', by.y='rxn_class_hierarchy', all.x=T, all.y=F)
  combo <- merge(x=combo, y=rxnClassStats, by.x=c('rxn_class', 'rxn_class_hierarchy'), by.y=c('rxn_class', 'rxn_class_hierarchy'), all.x=T, all.y=F)




  # reorder columns...
  if(includeReactionIDs) {
    combo <- combo[,c(1,2,4,5,6,13,7,16,8,19,9)]
    colnames(combo) = c("rxnClass", "rxnClassHierarcy", "ecNumber", "classLevel", "metCount", "totalMetsInRxnClass",
                        "proteinCount", "totalProteinsInRxnClass", "reactionCount", "totalRxnsInClass", "reactionIds")
  } else {
    combo <- combo[,c(1,2,4,5,6,13,7,16,8,19)]
    colnames(combo) = c("rxnClass", "rxnClassHierarcy", "ecNumber", "classLevel", "metCount", "totalMetsInRxnClass",
                        "proteinCount", "totalProteinsInRxnClass", "reactionCount", "totalRxnsInClass")
  }


  # need to include level 1 stats for all classes
  levelOneClasses = unlist(combo[combo$classLevel == 1,]$rxnClass)
  mStats <- analyteClassStats$metStats
  pStats <- analyteClassStats$protStats

  mStats <- mStats[!(mStats$rxn_class %in% levelOneClasses) & mStats$stat_ec_level == 1,]
  pStats <- pStats[!(pStats$rxn_class %in% levelOneClasses) & pStats$stat_ec_level == 1,]
  rStats <- rxnClassStats[!(rxnClassStats$rxn_class %in% levelOneClasses) & rxnClassStats$stat_ec_level == 1,]

  # only add extra stats if there are level 1 classes that are not hit by analytes
  if(nrow(mStats > 0)) {
    extraStats <- merge(x=mStats, y=pStats, by.x="rxn_class_hierarchy", by.y="rxn_class_hierarchy", all.x=T, all.y=T, no.dups=T)
    extraStats <- merge(x=extraStats, y=rStats, by.x="rxn_class_hierarchy", by.y="rxn_class_hierarchy", all.x=T, all.y=T, no.dups=T)

    extraStats <- extraStats[,c(2,1,4,3,5,9,13)]

    extraStats$metCount <- 0
    extraStats$proteinCount <- 0
    extraStats$reactionCount <- 0

    if(includeReactionIDs) {
      extraStats$reactionIds <- ""

      extraStats <- extraStats[,c(1:4,8,5,9,6,10,7,11)]
      colnames(extraStats) <- c(c("rxnClass", "rxnClassHierarcy", "ecNumber", "classLevel", "metCount", "totalMetsInRxnClass",
                                  "proteinCount", "totalProteinsInRxnClass", "reactionCount", "totalRxnsInClass", "reactionIds"))
    } else {
      extraStats <- extraStats[,c(1:4,8,5,9,6,10,7)]
      colnames(extraStats) <- c(c("rxnClass", "rxnClassHierarcy", "ecNumber", "classLevel", "metCount", "totalMetsInRxnClass",
                                  "proteinCount", "totalProteinsInRxnClass", "reactionCount", "totalRxnsInClass"))
    }

    combo <- rbind(combo, extraStats)
  }


#  analyteNullStats <- analyteClassStats$metStats[analyteClassStats$metStats$rxn_class]
#  nullClassStats <-

  c2 <- combo
  c3 <- c2[,1:3]
  c4 <- c2[,4:10]
  c4 <- data.frame(sapply(c4, as.integer))

  combo[,4:10] <- sapply(combo[,4:10], as.integer)


  if(!concatResults) {
    result <- list()
    result[['class_ec_level_1']] <- combo[combo$classLevel == 1,]
    result[['class_ec_level_2']] <- combo[combo$classLevel == 2,]
    result[['class_ec_level_3']] <- combo[combo$classLevel == 3,]
    result[['class_ec_level_4']] <- combo[combo$classLevel == 4,]
  } else {
    result = combo
    result = result[with(result, order(classLevel, -reactionCount)),]
  }

  print("Completed reaction class query...")

  return(result)
}


#' getReactionParticipants returns protein information for a list of reaction ids.
#' This utility method can help extend information from previous queries.
#' For instance, if a user queries for reactions related to a list of metabolites,
#' this method can be used to return proteins on some subset of reaction ids to find related proteins.
#'
#' @param db a RaMP databse object
#' @param reactionList list of reaction ids
#'
#' @return returns a dataframe object with reaction to reaction participant mappings.
#' @export
getReactionParticipants <- function(db = RaMP(), reactionList = c()) {
  reactionListStr <- listToQueryString(reactionList)


  sql = paste0('select rm.rxn_source_id as reaction_id, rm.met_source_id as participant_id, rm.met_name as participant_name, rm.is_cofactor as is_cofactor, rm.substrate_product as is_product, cp.iso_smiles as iso_smiles ',
               'from reaction2met rm, chem_props cp ',
               'where rxn_source_id in (', reactionListStr,") and cp.chem_source_id = rm.met_source_id;")

  metResult <- runQuery(sql = sql, db=db)
  metResult$participant_role <- 'substrate'
  metResult$participant_role_id <- 3
  metResult$participant_role[metResult$is_product == 1] <- 'product'
  metResult$participant_role_id[metResult$is_product == 1] <- 4
  metResult$participant_role[metResult$is_cofactor == 1] <- 'cofactor'
  metResult$participant_role_id[metResult$is_cofactor == 1] <- 2

  metResult$reaction_type <- 'biochemical'

  sql = paste0('select rxn_source_id as reaction_id, uniprot as participant_id, protein_name as participant_name from reaction2protein where rxn_source_id in (', reactionListStr,");")

  proteinResult <- runQuery(sql = sql, db=db)
  proteinResult$is_cofactor <- NA
  proteinResult$is_product <- NA
  proteinResult$iso_smiles <- NA
  proteinResult$participant_role <- 'enzyme'
  proteinResult$participant_role_id <- '1'

  proteinResult$reaction_type <- 'biochemical'

  sql = paste0('select rxn_source_id, is_transport from reaction where rxn_source_id in (',reactionListStr,') and is_transport = 1')

  rxnResult <- runQuery(sql = sql, db=db)

  if(nrow(rxnResult) > 0) {
    transportRxns <- unique(unlist(rxnResult$rxn_source_id))
    proteinResult$participant_role[proteinResult$reaction_id %in% transportRxns] <- 'transporter'

    # set reaction type
    proteinResult$reaction_type[proteinResult$reaction_id %in% transportRxns] <- 'transport'
    metResult$reaction_type[metResult$reaction_id %in% transportRxns] <- 'transport'
  }

  result <- rbind(metResult, proteinResult)

  result <- result[with(result, order(reaction_id, participant_role_id)),]

  # reorder columns
  colOrder <- c(1,9, 7, 2, 3, 6)
  result <- result[,colOrder]

  colnames(result) <-  c("reactionId", "reactionType", "participantRole", "participantId", "participantName", "isoSmiles" )

  return(result)
}

#' getReactionDetails returns general reaction information for a list of reaction ids.
#' This utility methed can help extend information from previous queries.
#' For instance, if a user queries for reactions related to a list of analytes, or filtered on reactions,
#' this method can be used to return general reaction info on some subset of reaction ids of interest.
#'
#' @param db a RaMP databse object
#' @param reactionList list of reaction ids
#'
#' @return returns a dataframe object with reaction information.
#' @export
getReactionDetails <- function(db = RaMP(), reactionList = c()) {
  reactionListStr <- listToQueryString(reactionList)
  sql = paste0('select rxn_source_id, direction, is_transport, has_human_prot, ec_num, label, equation, html_equation from reaction where rxn_source_id in (', reactionListStr,");")

  result <- runQuery(sql = sql, db=db)

  colnames(result) <- c("reactionId", "direction", "isTransport", "hasHumanProtein", "ecNumber", "label", "equation", "htmlEquation")

  return(result)
}


#' getAnalyteReactionAssociations returns mets associated with input proteins, and proteins associated with input metabolites
#' Associations are made through shared Rhea reactions
#'
#' @param db a RaMP database object
#' @param analytes list of analyte ids
#' @param includeRheaRxnDetails returns additional columns that inlucdes info in the reaction connecting mets and proteins
#' @param humanProteins require reactions to have a human protein (enzyme or transporter), default True
#'
#' @return returns a three dataframes of reaction EC classe information, one for each EC level
#' @export
getRheaAnalyteReactionAssociations <- function(db = RaMP(), analytes, includeRheaRxnDetails=F, humanProteins=T) {

    m2p <- RaMP:::getRheaEnzymesAndTransportersForMetabolites(db = db, analytes=analytes, includeRheaRxnDetails=includeRheaRxnDetails, humanProteins=humanProteins)
    p2m <- RaMP:::getRheaMetabolitesForProteins(db = db, analytes=analytes, includeRheaRxnDetails=includeRheaRxnDetails, humanProteins=humanProteins)

    if(nrow(m2p) > 0 && nrow(p2m) > 0) {
      result = rbind(m2p, p2m)
    } else if(nrow(m2p) > 0) {
      result = m2p
    } else {
      result = p2m
    }

    if(nrow(result) > 0) {
      if(!includeRheaRxnDetails) {
        result <- result[order(result$input_common_names, result$rxn_partner_ids),]
      } else {
        result <- result[order(result$reactionId, result$substrateProductFlag),]
      }
    }
    return(result)
}

#####################################################
###################################
#
# Supporting utility methods
#
##

#' getRheaEnzymesAndTransportersForMetabolites returns proteins that are either enzymes or transporters
#' that are reaction participants with the input metabolite ids.
#'
#' @param db a RaMP databse object
#' @param analytes analyte id vector
#' @param includeRheaRxnDetails returns additional columns that inlucdes info in the reaction connecting mets and proteins
#' @param humanProteins require reactions to have a human protein (enzyme or transporter), default True
#'
#' @return returns a dataframe object with input metabolites, and reaction and associated protein information.
getRheaEnzymesAndTransportersForMetabolites <- function(db = RaMP(), analytes, includeRheaRxnDetails=F, humanProteins=T) {

  chebiIds <- analytes[grepl("chebi", analytes)]

  if(length(chebiIds) == 0) {
    print("There are no ChEBI metabolite IDs in the input. Skipping metabolite to protein query step.")
    return(data.frame())
  }

  rxns <- getReactionsForAnalytes(db=db, analytes=chebiIds, humanProtein = humanProteins, includeTransportRxns = T)


  if(nrow(rxns$met2rxn) > 0) {
    reactions <- unique(unlist(rxns$met2rxn$reactionId))
    rxnParticipants <- getReactionParticipants(db=db, reactionList = reactions)
    rxnParticipants <- rxnParticipants[rxnParticipants$participantRole %in% c("enzyme", "transporter"),]

    rxns <- rxns$met2rxn[,c(2, 3, 4, 5,6, 1, 8)]

    result <- merge(x=rxns, y=rxnParticipants, by.x="reactionId", by.y="reactionId", all.x=T, all.y=T)

    if(!includeRheaRxnDetails) {
      keeperCols = c('metSourceId', 'metName', 'participantName', 'participantId')
      result <- result[, keeperCols]
      result$relation <- "met2protein"
      result <- result[,c(5,1,2,4,3)]
      colnames(result) <- c("query_relation", "input_analyte", "input_common_names", "rxn_partner_common_name", "rxn_partner_ids")
      result <- unique(result)
    } else {
      result$relation <- "met2protein"
      result <- result[,c(13, 3, 4, 10, 11, 9, 5, 1, 8, 7)]
      colnames(result) <- c("relation","inputId", "inputCommonName", "reactionPartnerId", "reactionPartnerCommonName", "partnerRole", "substrateProductFlag", "reactionId", "reactionType", "rxnEquation")
      result <- result[order(result$inputId, result$reactionPartnerCommonName, result$substrateProductFlag),]
      result <- unique(result)
    }

  } else {
    print("None of the input ids mapped to reactions in RaMP. Empty result.")
    result = data.frame()
  }

  return(result)
}


#' getRheaEnzymesAndTransportersForMetabolites returns proteins that are either enzymes or transporters
#' that are reaction participants with the input metabolite ids.
#'
#' @param db a RaMP databse object
#' @param analytes analyte id vector
#' @param includeRheaRxnDetails returns additional columns that inlucdes info in the reaction connecting mets and proteins
#' @param humanProteins require reactions to have a human protein (enzyme or transporter), default True
#'
#' @return returns a dataframe object with input metabolites, and reaction and associated protein informatin.
getRheaMetabolitesForProteins <- function(db = RaMP(), analytes, includeRheaRxnDetails=F, humanProteins=T) {

  uniprotIds <- analytes[grepl("uniprot", analytes)]

  if(length(uniprotIds) == 0) {
    print("There are no uniprot accessions in the input. Skipping protein to metabolite query step.")
    return(data.frame())
  }

  rxns <- getReactionsForAnalytes(db=db, analytes=uniprotIds, humanProtein = humanProteins, includeTransportRxns = T)

  if(nrow(rxns$prot2rxn) > 0) {
    reactions <- unique(unlist(rxns$prot2rxn$reactionId))
    rxnParticipants <- getReactionParticipants(db=db, reactionList = reactions)
    rxnParticipants <- rxnParticipants[rxnParticipants$participantRole %in% c("substrate", "product", "cofactor"),]

    rxns <- rxns$prot2rxn[,c(2, 3, 4, 5,6, 2, 1, 8)]

    result <- merge(x=rxns, y=rxnParticipants, by.x="reactionId", by.y="reactionId", all.x=T, all.y=T)

    result <- result[,-ncol(result)]

    if(!includeRheaRxnDetails) {
      keeperCols = c('uniprot', 'proteinName', 'participantName', 'participantId')
      result <- result[, keeperCols]
      result$relation <- "protein2met"
      result <- result[,c(5,1,2,4,3)]
      colnames(result) <- c("query_relation", "input_analyte", "input_common_names", "rxn_partner_ids", "rxn_partner_common_name")
      result <- unique(result)
    } else {
      result$relation <- "protein2met"
      result$partnerRole <- "metabolite"
      result <- result[,c(13,2,3,11,12,14,10,1,9,5)]

      colnames(result) <- c("relation","inputId", "inputCommonName", "reactionPartnerId", "reactionPartnerCommonName", "partnerRole", "substrateProductCofactor", "reactionId", "reactionType", "rxnEquation")

      # keep substrateProductCofator descriptive, not integer flag...
      # result$substrateProductFlag[result$substrateProductFlag == 'substrate'] <- 0
      # result$substrateProductFlag[result$substrateProductFlag == 'product'] <- 1
      # result$substrateProductFlag[result$substrateProductFlag == 'cofactor'] <- -1
      result <- result[order(result$inputId, result$reactionPartnerCommonName, result$substrateProductCofactor),]
      result <- unique(result)
    }

  } else {
    print("None of the input ids mapped to reactions in RaMP. Empty result.")
    result = data.frame()
  }
  return(result)
}



 #' Utility method to combine two list to tally unique counts and combine lists into a string.
 #' This utility script specifically supports accounting in reaction class queries.
 combineStringLists <- function(x, y, sep=",") {
   reactions <- c()
   rxn_count <- c()
   for(i in 1:length(x)) {
     if(x[i] != "") {
       data <- paste0(x[i], ",", y[i])
     } else {
       data <- y[i]
     }
     dataSplit <- strsplit(data, split=sep)
     dataSplit <- unlist(unique(dataSplit))

     size = length(dataSplit)
     data <- paste0(dataSplit, collapse=", ")
     reactions <- c(reactions, data)
     rxn_count <- c(rxn_count, size)
   }
   return(data.frame(rxn_count, reactions))
 }

#' getRampSourceInfoFromAnalyteIDs Utility method to extract source table information from analyte ids
#'
#' @param db RaMP Database object
#' @param analytes list of analyte ids
#' @return returns a dataframe of ramp analyte source information
#'
getRampSourceInfoFromAnalyteIDs <- function(db = RaMP(), analytes) {

  analyteStr <- listToQueryString(analytes)

  query = paste("select distinct sourceId, rampId, geneOrCompound from source where sourceId in (",analyteStr,")")

  df <- RaMP::runQuery(query, db)

  return(df)
}



#' Utility method that evalutates the mapping counts for analytes to reaction ids
#'
#' @param analyte2Rxn result object from getReactionsForAnalytes
#' @param minRxnParticipantFilter if > 1, the set of reactions is reduced to those with having this number of mapped analytes
#'
getReactionParticpantCounts <- function(analyte2Rxn, minRxnParticipantCountFilter=1) {

  metCounts <- data.frame(table(unlist(analyte2Rxn$met2rx$reactionId)))
  proteinCounts <- data.frame(table(unlist(analyte2Rxn$protein2rxn$reactionId)))

  # need to assess status, do we have mets and proteins to reactions, one or the other
  if(nrow(metCounts) > 0 && nrow(proteinCounts) > 0) {

    mergedCounts <- merge(metCounts, proteinCounts, by.x = 'Var1', by.y='Var1', all.x=T, all.y=T)
    mergedCounts$Freq.x[is.na(mergedCounts$Freq.x)] <- 0
    mergedCounts$Freq.y[is.na(mergedCounts$Freq.y)] <- 0
    mergedCounts$partCount <- mergedCounts$Freq.x + mergeCounts$Freq.y
    mergedCounts <- mergedCounts[,c(1,4)]

  } else if(nrow(metCounts) > 0) {
    mergedCounts <- metCounts
  } else if(nrow(proteinCounts) > 0) {
    mergedCounts <- proteinCounts
  } else {
    # return an empty dataframe if there are not reaction mappings
    result <- list()
    result[['merged_rxn2analyte_count']] <- data.frame()
    result[['total_mapped_rxns']] <- 0
    result[['total_rxns_retained']] <- 0
    return(result)
  }

  colnames(mergedCounts) <- c('rxn_source_id', 'partCount')
  filteredMergedCounts <- mergedCounts[mergedCounts$partCount >= minRxnParticipantCountFilter,]

  totalRxns <- nrow(mergedCounts)
  totalRxnReatained <- nrow(filteredMergedCounts)

  result <- list()
  result[['merged_rxn2analyte_count']] <- filteredMergedCounts
  result[['total_mapped_rxns']] <- totalRxns
  result[['total_rxns_retained']] <- totalRxnReatained

  return(result)
}

#' Utility method that returns the reaction source ids for a given result from getReactionsForAnalytes
#'
#' @param reactions input result from the getReactionsForAnalytes
#'
getReactionSourceIdsFromReactionQuery <- function(reactions) {
  rxns <- c()
  if(nrow(reactions$met2rxn) > 0) {
    rxns <- c(rxns, unlist(reactions$met2rxn$rxn_source_id))
  }
  if(nrow(reactions$prot2rxn) > 0) {
    rxns <- c(rxns, unlist(reactions$prot2rxn$rxn_source_id))
  }
  return(unique(rxns))
}


#' Utility method that returns the number of rhea reaction count in each reaction class.
#'
#' @param reactions input result from the getReactionsForAnalytes
#' @param humanProtein boolean value indicating if reactions should be constrained to those having human proteins
#'
getReactionClassStats <- function(db=RaMP(), humanProtein=T) {

  humanProteinArg = ''
  if(humanProtein) {
    humanProteinArg = ' and r.has_human_prot = 1'
  }

  sql = paste0("select rc.rxn_class, rc.rxn_class_hierarchy, rc.ec_level as stat_ec_level, rc.rxn_class_ec as stat_class_ec, count(distinct(rc.rxn_source_id)) as Total_Rxns_in_Class
  from reaction_ec_class rc, reaction r
  where rc.rxn_source_id = r.rxn_source_id ",
  humanProteinArg,
  " group by rxn_class, ec_level, rxn_class_ec")

  result <- runQuery(sql=sql, db=db)

  return(result)
}

#' Utility method that returns the number of rhea reaction count in each reaction class.
#'
#' @param db RaMP Object
#' @param humanProtein boolean value indicating if reactions should be constrained to those having human proteins
#'
getReactionClassStatsOnAnalytes <- function(db=RaMP(), humanProtein=T) {

  humanProteinArg = ''
  if(humanProtein) {
    humanProteinArg = ' and r.has_human_prot = 1'
  }

  sql = paste0("select rc.rxn_class, rc.rxn_class_hierarchy, rc.ec_level as stat_ec_level, rc.rxn_class_ec as stat_class_ec, count(distinct(rm.met_source_id)) as Total_in_RxnClass_Metab
  from reaction_ec_class rc, reaction2met rm, reaction r
  where rm.rxn_source_id = rc.rxn_source_id
  and rc.rxn_source_id = r.rxn_source_id ",
  humanProteinArg
  , " group by rc.rxn_class, rc.ec_level, rc.rxn_class_ec")

  metRes <- runQuery(sql=sql, db=db)

  sql = paste0("select rc.rxn_class, rc.rxn_class_hierarchy, rc.ec_level as stat_ec_level, rc.rxn_class_ec as stat_class_ec,  count(distinct(rp.uniprot)) as Total_in_RxnClass_Protein
  from reaction_ec_class rc, reaction2protein rp, reaction r
  where rp.rxn_source_id = rc.rxn_source_id
  and rc.rxn_source_id = r.rxn_source_id ",
  humanProteinArg,
  " group by rc.rxn_class, rc.ec_level, rc.rxn_class_ec")

  protRes <- runQuery(sql=sql, db=db)

  return(list(metStats=metRes, protStats=protRes))
}

#' Utility method that returns rhea reaction urls
#'
#' @param reactionList list of prefixed (rhea:) reaction ids
#'
getReactionRheaURLs <- function(reactionList) {
  baseURL = "https://www.rhea-db.org/"
  reactionList <- gsub(":","/",reactionList)
  urls <- paste0(baseURL, reactionList)
  return(urls)
}


checkReactionInputIds <- function(callingFunction, idList) {
  idCount = length(idList)

  metIds = idList[grepl("chebi:", idList)]
  proteinIds = idList[grepl("uniprot:", idList)]

  chebiCount <- length(metIds)
  uniprotCount <- length(proteinIds)

  message("Reporting Function: ", callingFunction)
  message("The input list has ",idCount," IDs.")
  message("The input list has ",chebiCount," chebi IDs.")
  message("The input list has ",uniprotCount," uniprot IDs.")
  invalidCount = idCount - (chebiCount + uniprotCount)
  if(invalidCount > 0) {
    message(invalidCount, " IDs will not be analyzed.")
    message("Reaction queries support ChEBI IDs, prefixed with 'chebi:'")
    message("and uniprot IDs, prefixed with 'uniprot:'")
  }
  ids <- list(metIds = metIds, proteinIds = proteinIds)
  return(ids)
}
ncats/RaMP-DB documentation built on April 28, 2024, 3:28 a.m.