R/kegg_get.R

Defines functions .kegg_kgml_get_graph .kegg_kgml_get_maps .kegg_kgml_get_reactions .kegg_kgml_exp_rct_rel .kegg_kgml_exp_cpd_rel .kegg_kgml_exp_grp_rel .kegg_kgml_get_relations .kegg_kgml_get_groups .kegg_kgml_get_entries parse_kgml_fusion parse_kgml_chemical parse_kgml_regulatory kgml2network kegg_get_pathways .kegg_get

# super function

.kegg_get <- function(...) {

    # find the 'db' argument

    kid <- pmatch('kid', ...names())

    # find the option argument if present

    option <- pmatch('option', ...names())

    # check if kid and/or options are defined

    if(is.na(kid)) {
        message('\U274C Argument `db OR/AND option`kid`` is required!')
        stop()
    }

    # find the value of kid

    kid <- ...elt(kid)

    # find the value of option if present

    if(!is.na(option)) {
        option <- ...elt(option)
    }

    # TODO add kid/option tests logic

    # form query

    query <- sprintf('get/%s/%s', kid, option)

    # send request to KEGG API

    resp      <- .kegg_rest(query)

    # Check the content type

    cont_type <- .kegg_resp_content_type(resp)

    # read content of response

    cont <- httr::content(
        x        = resp,
        as       = 'text',
        type     = cont_type[1],
        encoding = cont_type[2]
    )

    # apply middleware

    # return results

    return(cont)
}

# middleware functions

kegg_get_pathways <- function(kid) {

    # TODO add test, so it lists only pathways, nothing more
    pathways <- .kegg_list(db = kid)

    xmll <- lapply(
        pathways$kid,
        \(kid) {
            .kegg_get(kid = kid, option = 'kgml')
        }
    )

    xmll <- setNames(xmll, pathways$pathname)

    nodes <- lapply(
        xmll,
        \(xml) {
            .kegg_kgml_get_entries(xml) |>
                dplyr::mutate(
                    label = paste(type, id, sep = ': '),
                    group = type,
                    shape = dplyr::case_when(
                        type == 'gene' ~ 'square',
                        type == 'compound' ~ 'triangle',
                        type == 'map' ~ 'database',
                        TRUE ~ 'diamond'
                    ),
                    title = sprintf("<p><b>%s</b></p>", kid),
                    shadow = TRUE
                )
        }
    ) |>
        data.table::rbindlist(fill = TRUE)

    edges <- lapply(
        xmll,
        \(xml) {
            .kegg_kgml_exp_rct_rel(x) |>
                dplyr::mutate(
                    label = byname,
                    dashes = (byname == 'Group'),
                    title = paste(byname, byval),
                    smooth = TRUE,
                    shadow = TRUE
                )
        }
    )
}

kgml2network <- function(type, ...) {

    res <- switch(
        type,
        regulatory = parse_kgml_regulatory(),
        chemical   = parse_kgml_chemical(),
        fusion     = parse_kgml_fusion()
    )

    if(is.null(res)) {
        stop('Invalid type provided!')
    }

    return(res)
}

parse_kgml_regulatory <- function() {
    return('regulatory')
}

parse_kgml_chemical <- function() {
    return('chemical')
}

parse_kgml_fusion <- function() {
    return('fusion')
}

.kegg_kgml_get_entries <- function(x,...) {
    # read xml
    xml <- xml2::read_xml(x)
    # read ./entry nodes
    ent <- xml2::xml_find_all(xml, './entry')
    # create an entry table
    ent <- data.table::rbindlist(
        lapply(
            ent,
            \(node) {
                as.list(xml2::xml_attrs(node))
            }
        ),
        fill = TRUE
    ) |>
        dplyr::mutate(kid = stringr::str_split(name, '\\s+'), name = NULL) |>
        dplyr::select(any_of(c('id', 'kid', 'type', 'reaction')))
    # check if there any entries with type == 'group' attribute
    # and if yes, read groups and expand them into entries
    if(any(ent$type == 'group')) {
        message('Expanding the groups in a node set...')
        grp <- .kegg_kgml_get_groups(x)

    }

    return(ent)
}

.kegg_kgml_get_groups <- function(x,...) {
    xml <- xml2::read_xml(x)
    ent <- xml2::xml_find_all(xml, './entry')
    ent <- ent[xml2::xml_attr(ent, 'type') %in% c('group')]

    data.table::rbindlist(
        lapply(
            ent,
            \(node) {
                gratrs <- as.list(xml2::xml_attrs(node))
                cpatrs <- xml2::xml_attr(xml2::xml_find_all(node, 'component'), 'id')
                cpatrs <- list(outerid = sapply(cpatrs, c))
                c(gratrs, cpatrs)
            }
        )
    )
}

.kegg_kgml_get_relations <- function(x,...) {

    xml <- xml2::read_xml(x)
    ent <- xml2::xml_find_all(xml, './relation')

    if(!length(ent)) {
        message('No relations found...')
        invisible(return(data.frame()))
    }

    rel <- data.table::rbindlist(
        lapply(
            ent,
            \(node) {
                sbt = xml2::xml_find_all(node, 'subtype')

                sbt = data.frame(
                    byname = xml2::xml_attr(sbt, 'name'),
                    byval  = xml2::xml_attr(sbt, 'value')
                )

                if(!nrow(sbt)) {
                    sbt = data.frame(byname = 'unknown', byval = NA)
                }

                rel = data.frame(
                    from  = xml2::xml_attr(node, 'entry1'),
                    to    = xml2::xml_attr(node, 'entry2'),
                    type  = xml2::xml_attr(node, 'type')
                )

                rel = cbind(rel, sbt)

                rel = .kegg_kgml_exp_cpd_rel(rel)

                return(rel)

            }
        )
    )

    return(rel)
}

.kegg_kgml_exp_grp_rel <- function(x) {

    # define groups and relations
    grp <- .kegg_kgml_get_groups(x)
    rel <- .kegg_kgml_get_relations(x)

    # return relations if group is empty
    if(!nrow(grp)) {
        message('No group-nodes found, returning the original dataset...')
        return(rel)
    }

    # complete graph for groups
    cmplgrph <- data.table::rbindlist(
        lapply(
            split(grp, grp$id),
            \(x) {
                expand.grid(
                    from = x$outerid,
                    to   = x$outerid
                ) |>
                    dplyr::mutate(type = 'Group', byname = 'group', byval = '') |>
                    dplyr::filter(from != to)
            }
        )
    )

    # replace `from` var in reldt if present
    reldt |>
        dplyr::filter((from %in% grp$id) | (to %in% grp$id)) |>
        dplyr::left_join(dplyr::select(grp, id, outerid), by = c('from' = 'id')) |>
        dplyr::mutate(from = ifelse(!is.na(outerid), outerid, from), outerid = NULL) |>
        dplyr::left_join(dplyr::select(grp, id, outerid), by = c('to' = 'id')) |>
        dplyr::mutate(to = ifelse(!is.na(outerid), outerid, to), outerid = NULL) |>
        dplyr::bind_rows(cmplgrph, rel) |>
        dplyr::filter(! from %in% grp$id & ! to %in% grp$id)
}

.kegg_kgml_exp_cpd_rel <- function(x) {

    if(! 'byname' %in% colnames(x)) {
        return(x)
    } else if(any((x$type %in% c('ECrel', 'maplink') & (x$byname == 'compound')))) {
        # filter all what does not belong to 'ECrel' or 'maplink'
        tmp <- dplyr::filter(x, type %in% c('ECrel', 'maplink'), byname == 'compound', !is.na(byval))
        # antijoin x, so it is a complement to tmp
        dplyr::bind_rows(
            dplyr::mutate(tmp, to   = byval, byname = 'product',   byval = 'product of'),
            dplyr::mutate(tmp, from = byval, byname = 'substrate', byval = 'substrate of')
        ) |>
            dplyr::bind_rows(
                dplyr::anti_join(x, tmp, by = colnames(x))
            )
    }
}

.kegg_kgml_exp_rct_rel <- function(x) {

    rct <- .kegg_kgml_get_reactions(x)
    ent <- .kegg_kgml_get_entries(x)
    rel <- .kegg_kgml_get_relations(x)

    if(!'reaction' %in% colnames(ent)) {
        return(rel)
    }

    tmp <- ent |>
        dplyr::rename(ent = kid, ent_type = type, ent_id = id, id = reaction) |>
        dplyr::right_join(rct, by = c('id' = 'kid')) |>
        dplyr::select(ent_id, rct_id = id, reaction = type, product, substrate) |>
        tidyr::pivot_longer(cols = c(product, substrate), names_to = 'rct_entries', values_to = 'rct_vals') |>
        dplyr::mutate(
            cpd = ifelse(rct_entries == 'product', 'to', 'from'),
            ent  = ifelse(cpd == 'to', 'from', 'to')
        ) |>
        dplyr::transmute(
            byname   = rct_id,
            byval    = reaction,
            type     = 'reaction',
            id       = purrr::map2(ent_id, rct_vals, .f = c),
            fromto   = purrr::map2(ent, cpd, .f = c)
        ) |>
        tidyr::unnest(cols = c('id', 'fromto')) |>
        tidyr::pivot_wider(names_from = 'fromto', values_from = 'id') |>
        tidyr::unnest(cols = c('from', 'to')) |>
        dplyr::select(from, to, byname, byval)


    dplyr::bind_rows(
        rel,
        tmp
    ) |>
        unique()

}

.kegg_kgml_get_reactions <- function(x,...) {

    xml <- xml2::read_xml(x)

    rct <- xml2::xml_find_all(xml, './reaction')

    if(!length(rct)) { # change because if not empty returns vector
        message('Reaction set is empty!')
        return(NULL)
    }

    sbs <- lapply(
        rct,
        \(node) {
            ids <- xml2::xml_attr(node, 'id')
            lst <- xml2::xml_attrs(xml2::xml_find_all(node, 'substrate'))
            lst <- lapply(lst, as.list)
            lst <- data.table::rbindlist(lst)
            lst$reaction_id = ids
            lst
        }
    ) |>
        data.table::rbindlist(fill = TRUE) |>
        dplyr::rename(substrate = id, kid_substrate = name, id = reaction_id) |>
        dplyr::select(id, substrate, kid_substrate)

    prd <- lapply(
        rct,
        \(node) {
            ids <- xml2::xml_attr(node, 'id')
            lst <- xml2::xml_attrs(xml2::xml_find_all(node, 'product'))
            lst <- lapply(lst, as.list)
            lst <- data.table::rbindlist(lst)
            lst$reaction_id = ids
            lst
        }
    ) |>
        data.table::rbindlist(fill = TRUE) |>
        dplyr::rename(product = id, kid_product = name, id = reaction_id) |>
        dplyr::select(id, product, kid_product)

    rct <- data.table::rbindlist(
        lapply(
            rct,
            \(node) {
                as.list(xml2::xml_attrs(node))
            }
        ),
        fill = TRUE
    ) |>
        dplyr::rename(kid = name) |>
        dplyr::left_join(
            dplyr::full_join(prd, sbs, by = 'id'),
            by = 'id'
        ) |>
        dplyr::select(-id)

    return(rct)
}

.kegg_kgml_get_maps <- function(x, ...) {
    xml <- xml2::read_xml(x)
    ent <- xml2::xml_find_all(xml, './entry')
    ent <- ent[xml2::xml_attr(ent, 'type') == 'map']

    data.table::rbindlist(
        lapply(
            ent,
            \(node) { as.list(xml2::xml_attrs(node)) }
        )
    ) |>
        dplyr::select(id, kid = name, type)
}

.kegg_kgml_get_graph <- function(x, ...) {

}
utubun/keggr documentation built on Jan. 29, 2022, 5:08 a.m.