# super function
.kegg_get <- function(...) {
# find the 'db' argument
kid <- pmatch('kid', ...names())
# find the option argument if present
option <- pmatch('option', ...names())
# check if kid and/or options are defined
if(is.na(kid)) {
message('\U274C Argument `db OR/AND option`kid`` is required!')
stop()
}
# find the value of kid
kid <- ...elt(kid)
# find the value of option if present
if(!is.na(option)) {
option <- ...elt(option)
}
# TODO add kid/option tests logic
# form query
query <- sprintf('get/%s/%s', kid, option)
# send request to KEGG API
resp <- .kegg_rest(query)
# Check the content type
cont_type <- .kegg_resp_content_type(resp)
# read content of response
cont <- httr::content(
x = resp,
as = 'text',
type = cont_type[1],
encoding = cont_type[2]
)
# apply middleware
# return results
return(cont)
}
# middleware functions
kegg_get_pathways <- function(kid) {
# TODO add test, so it lists only pathways, nothing more
pathways <- .kegg_list(db = kid)
xmll <- lapply(
pathways$kid,
\(kid) {
.kegg_get(kid = kid, option = 'kgml')
}
)
xmll <- setNames(xmll, pathways$pathname)
nodes <- lapply(
xmll,
\(xml) {
.kegg_kgml_get_entries(xml) |>
dplyr::mutate(
label = paste(type, id, sep = ': '),
group = type,
shape = dplyr::case_when(
type == 'gene' ~ 'square',
type == 'compound' ~ 'triangle',
type == 'map' ~ 'database',
TRUE ~ 'diamond'
),
title = sprintf("<p><b>%s</b></p>", kid),
shadow = TRUE
)
}
) |>
data.table::rbindlist(fill = TRUE)
edges <- lapply(
xmll,
\(xml) {
.kegg_kgml_exp_rct_rel(x) |>
dplyr::mutate(
label = byname,
dashes = (byname == 'Group'),
title = paste(byname, byval),
smooth = TRUE,
shadow = TRUE
)
}
)
}
kgml2network <- function(type, ...) {
res <- switch(
type,
regulatory = parse_kgml_regulatory(),
chemical = parse_kgml_chemical(),
fusion = parse_kgml_fusion()
)
if(is.null(res)) {
stop('Invalid type provided!')
}
return(res)
}
parse_kgml_regulatory <- function() {
return('regulatory')
}
parse_kgml_chemical <- function() {
return('chemical')
}
parse_kgml_fusion <- function() {
return('fusion')
}
.kegg_kgml_get_entries <- function(x,...) {
# read xml
xml <- xml2::read_xml(x)
# read ./entry nodes
ent <- xml2::xml_find_all(xml, './entry')
# create an entry table
ent <- data.table::rbindlist(
lapply(
ent,
\(node) {
as.list(xml2::xml_attrs(node))
}
),
fill = TRUE
) |>
dplyr::mutate(kid = stringr::str_split(name, '\\s+'), name = NULL) |>
dplyr::select(any_of(c('id', 'kid', 'type', 'reaction')))
# check if there any entries with type == 'group' attribute
# and if yes, read groups and expand them into entries
if(any(ent$type == 'group')) {
message('Expanding the groups in a node set...')
grp <- .kegg_kgml_get_groups(x)
}
return(ent)
}
.kegg_kgml_get_groups <- function(x,...) {
xml <- xml2::read_xml(x)
ent <- xml2::xml_find_all(xml, './entry')
ent <- ent[xml2::xml_attr(ent, 'type') %in% c('group')]
data.table::rbindlist(
lapply(
ent,
\(node) {
gratrs <- as.list(xml2::xml_attrs(node))
cpatrs <- xml2::xml_attr(xml2::xml_find_all(node, 'component'), 'id')
cpatrs <- list(outerid = sapply(cpatrs, c))
c(gratrs, cpatrs)
}
)
)
}
.kegg_kgml_get_relations <- function(x,...) {
xml <- xml2::read_xml(x)
ent <- xml2::xml_find_all(xml, './relation')
if(!length(ent)) {
message('No relations found...')
invisible(return(data.frame()))
}
rel <- data.table::rbindlist(
lapply(
ent,
\(node) {
sbt = xml2::xml_find_all(node, 'subtype')
sbt = data.frame(
byname = xml2::xml_attr(sbt, 'name'),
byval = xml2::xml_attr(sbt, 'value')
)
if(!nrow(sbt)) {
sbt = data.frame(byname = 'unknown', byval = NA)
}
rel = data.frame(
from = xml2::xml_attr(node, 'entry1'),
to = xml2::xml_attr(node, 'entry2'),
type = xml2::xml_attr(node, 'type')
)
rel = cbind(rel, sbt)
rel = .kegg_kgml_exp_cpd_rel(rel)
return(rel)
}
)
)
return(rel)
}
.kegg_kgml_exp_grp_rel <- function(x) {
# define groups and relations
grp <- .kegg_kgml_get_groups(x)
rel <- .kegg_kgml_get_relations(x)
# return relations if group is empty
if(!nrow(grp)) {
message('No group-nodes found, returning the original dataset...')
return(rel)
}
# complete graph for groups
cmplgrph <- data.table::rbindlist(
lapply(
split(grp, grp$id),
\(x) {
expand.grid(
from = x$outerid,
to = x$outerid
) |>
dplyr::mutate(type = 'Group', byname = 'group', byval = '') |>
dplyr::filter(from != to)
}
)
)
# replace `from` var in reldt if present
reldt |>
dplyr::filter((from %in% grp$id) | (to %in% grp$id)) |>
dplyr::left_join(dplyr::select(grp, id, outerid), by = c('from' = 'id')) |>
dplyr::mutate(from = ifelse(!is.na(outerid), outerid, from), outerid = NULL) |>
dplyr::left_join(dplyr::select(grp, id, outerid), by = c('to' = 'id')) |>
dplyr::mutate(to = ifelse(!is.na(outerid), outerid, to), outerid = NULL) |>
dplyr::bind_rows(cmplgrph, rel) |>
dplyr::filter(! from %in% grp$id & ! to %in% grp$id)
}
.kegg_kgml_exp_cpd_rel <- function(x) {
if(! 'byname' %in% colnames(x)) {
return(x)
} else if(any((x$type %in% c('ECrel', 'maplink') & (x$byname == 'compound')))) {
# filter all what does not belong to 'ECrel' or 'maplink'
tmp <- dplyr::filter(x, type %in% c('ECrel', 'maplink'), byname == 'compound', !is.na(byval))
# antijoin x, so it is a complement to tmp
dplyr::bind_rows(
dplyr::mutate(tmp, to = byval, byname = 'product', byval = 'product of'),
dplyr::mutate(tmp, from = byval, byname = 'substrate', byval = 'substrate of')
) |>
dplyr::bind_rows(
dplyr::anti_join(x, tmp, by = colnames(x))
)
}
}
.kegg_kgml_exp_rct_rel <- function(x) {
rct <- .kegg_kgml_get_reactions(x)
ent <- .kegg_kgml_get_entries(x)
rel <- .kegg_kgml_get_relations(x)
if(!'reaction' %in% colnames(ent)) {
return(rel)
}
tmp <- ent |>
dplyr::rename(ent = kid, ent_type = type, ent_id = id, id = reaction) |>
dplyr::right_join(rct, by = c('id' = 'kid')) |>
dplyr::select(ent_id, rct_id = id, reaction = type, product, substrate) |>
tidyr::pivot_longer(cols = c(product, substrate), names_to = 'rct_entries', values_to = 'rct_vals') |>
dplyr::mutate(
cpd = ifelse(rct_entries == 'product', 'to', 'from'),
ent = ifelse(cpd == 'to', 'from', 'to')
) |>
dplyr::transmute(
byname = rct_id,
byval = reaction,
type = 'reaction',
id = purrr::map2(ent_id, rct_vals, .f = c),
fromto = purrr::map2(ent, cpd, .f = c)
) |>
tidyr::unnest(cols = c('id', 'fromto')) |>
tidyr::pivot_wider(names_from = 'fromto', values_from = 'id') |>
tidyr::unnest(cols = c('from', 'to')) |>
dplyr::select(from, to, byname, byval)
dplyr::bind_rows(
rel,
tmp
) |>
unique()
}
.kegg_kgml_get_reactions <- function(x,...) {
xml <- xml2::read_xml(x)
rct <- xml2::xml_find_all(xml, './reaction')
if(!length(rct)) { # change because if not empty returns vector
message('Reaction set is empty!')
return(NULL)
}
sbs <- lapply(
rct,
\(node) {
ids <- xml2::xml_attr(node, 'id')
lst <- xml2::xml_attrs(xml2::xml_find_all(node, 'substrate'))
lst <- lapply(lst, as.list)
lst <- data.table::rbindlist(lst)
lst$reaction_id = ids
lst
}
) |>
data.table::rbindlist(fill = TRUE) |>
dplyr::rename(substrate = id, kid_substrate = name, id = reaction_id) |>
dplyr::select(id, substrate, kid_substrate)
prd <- lapply(
rct,
\(node) {
ids <- xml2::xml_attr(node, 'id')
lst <- xml2::xml_attrs(xml2::xml_find_all(node, 'product'))
lst <- lapply(lst, as.list)
lst <- data.table::rbindlist(lst)
lst$reaction_id = ids
lst
}
) |>
data.table::rbindlist(fill = TRUE) |>
dplyr::rename(product = id, kid_product = name, id = reaction_id) |>
dplyr::select(id, product, kid_product)
rct <- data.table::rbindlist(
lapply(
rct,
\(node) {
as.list(xml2::xml_attrs(node))
}
),
fill = TRUE
) |>
dplyr::rename(kid = name) |>
dplyr::left_join(
dplyr::full_join(prd, sbs, by = 'id'),
by = 'id'
) |>
dplyr::select(-id)
return(rct)
}
.kegg_kgml_get_maps <- function(x, ...) {
xml <- xml2::read_xml(x)
ent <- xml2::xml_find_all(xml, './entry')
ent <- ent[xml2::xml_attr(ent, 'type') == 'map']
data.table::rbindlist(
lapply(
ent,
\(node) { as.list(xml2::xml_attrs(node)) }
)
) |>
dplyr::select(id, kid = name, type)
}
.kegg_kgml_get_graph <- function(x, ...) {
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.