R/kegg_list.R

Defines functions .kegg_parse_organism .kegg_parse_dgroup .kegg_parse_drug .kegg_parse_disease .kegg_parse_variant .kegg_parse_network .kegg_parse_enzyme .kegg_parse_rclass .kegg_parse_reaction .kegg_parse_glycan .kegg_parse_compound .kegg_parse_genome .kegg_parse_ko .kegg_parse_module .kegg_parse_brite .kegg_parse_pathway .kegg_list kegg_parse_list kegg_list

# superfunction

# superfunction

kegg_list <- function(db, org = NA, option = NA) {

    if (!is.na(org) & db == 'pathway') {
        url <- sprintf('http://rest.kegg.jp/list/%s/%s', db, org)
    } else if (db == 'brite' & (!is.na(option) & option == 'xl')) {
        url <- sprintf('http://rest.kegg.jp/list/%s/%s', db, option)
    } else {
        url <- sprintf('http://rest.kegg.jp/list/%s', db)
    }

    resp <- httr::GET(url)

    if (.kegg_check_response(resp)) {
        return(kegg_parse_list(resp, cnames = c('kid', 'dscr')))
        return(resp)
    }

    stop(httr::http_status(resp)$message)
}

# middleware

kegg_parse_list <- function(response, cnames = NULL) {
    txt <- httr::content(response, encoding = 'utf-8')
    res <- read.delim(text = txt, header = F, sep = '\t', strip.white = TRUE)
    if(length(cnames) == ncol(res)) {
        colnames(res) <- cnames
    } else if (db == 'organism') {
        colnames(res) <- c('taxid', 'kid', 'orgname', 'descr')
    }
    return(res)
}

# old staff


.kegg_list <- function(...) {

    # find the 'db' argument

    db <- pmatch('db', ...names())

    # find the option argument if present

    option <- pmatch('option', ...names())

    # check if the db | option are defined

    if(is.na(db) & is.na(option)) {
        message('\U274C Argument db OR/AND option is required!')
        stop()
    }

    # find the value of db if present

    if(!is.na(db)) {
        db <- ...elt(db)
    }

    # find the value of option if present

    if(!is.na(option)) {
        option <- ...elt(option)
    }

    # verify if db or option are in the list of allowed databases

    if(.kegg_verify_db(verb = 'list', db = ifelse(!is.na(db), db, option))) {

        # if database is brite, check if option 'xl' is present, ignore the rest

        if(db == 'brite' & option == 'xl') {
            query <- sprintf('list/%s/%s', db, option)
        } else if(grepl('(^[a-z]{3,4}$)|(^T\\d{5}$)', db)) {
            option <- db
            db     <- 'pathway' # wrong should return a list of genes
            query  <- sprintf('list/%s/%s', db, option)
        } else if(grepl('(^[a-z]{3,4}$)|(^T\\d{5}$)', option)) {
            db    <- 'pathway'
            query <- sprintf('list/pathway/%s', option)
        } else {
            query     <- sprintf('list/%s', db)
        }

        resp      <- .kegg_rest(query)
        cont_type <- .kegg_resp_content_type(resp)

        cont <- httr::content(
            x        = resp,
            as       = 'text',
            type     = cont_type[1],
            encoding = cont_type[2]
        )

        # apply middleware functions

        res <- switch(
            db,
            pathway   = .kegg_parse_pathway(cont),
            brite     = .kegg_parse_brite(cont),
            module    = .kegg_parse_module(cont),
            orthology = .kegg_parse_ko(cont),
            genome    = .kegg_parse_genome(cont),
            compound  = .kegg_parse_compound(cont),
            glycan    = .kegg_parse_glycan(cont),
            reaction  = .kegg_parse_reaction(cont),
            rclass    = .kegg_parse_rclass(cont),
            enzyme    = .kegg_parse_enzyme(cont),
            network   = .kegg_parse_network(cont),
            variant   = .kegg_parse_variant(cont),
            disease   = .kegg_parse_disease(cont),
            drug      = .kegg_parse_drug(cont),
            dgroup    = .kegg_parse_dgroup(cont),
            organism  = .kegg_parse_organism(cont)
        )

        return(res)

    }

    message(sprintf('\U274C Invalid query: %s/list/%s!', BASEURL, db))
    stop()
}

# middlewear functions

.kegg_parse_pathway <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'pathname'))

}

.kegg_parse_brite <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'hierarchy'))
}

.kegg_parse_module <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'description'))
}

.kegg_parse_ko <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'orthologs'))
}

.kegg_parse_genome <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'org')) |>
        tidyr::separate(org, into = c('orgkid', 'orgname'), sep = ';\\s+', extra = 'drop')
}

.kegg_parse_compound <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'compound'))
}

.kegg_parse_glycan <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'name'))
}

.kegg_parse_reaction <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'rct')) |>
        tidyr::separate(rct, into = c('enzyme', 'reaction'), sep = ';\\s+')
}

.kegg_parse_rclass <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'rclass'))
}

.kegg_parse_enzyme <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'name'))
}

.kegg_parse_network <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'name'))
}

.kegg_parse_variant <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'description'))
}

.kegg_parse_disease <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'disease'))
}

.kegg_parse_drug <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'name'))
}

.kegg_parse_dgroup <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('kid', 'name'))
}

.kegg_parse_organism <- function(cont) {

    read.delim(text = cont, header = FALSE, stringsAsFactors = FALSE) |>
        `colnames<-`(value = c('taxid', 'kid', 'orgname', 'taxonomy'))
}
utubun/keggr documentation built on Jan. 29, 2022, 5:08 a.m.