examples/rest_kegg.R

library(httr)
library(xml2)
library(RSQLite)
library(visNetwork)
library(igraph)

invisible(sapply(list.files('./R', '.*\\.R', TRUE, TRUE), source))

# list the organisms in KEGG

org <- .kegg_list(db = 'organism')

org[grepl('Pseudomonas aeruginosa.*', org$orgname), ]

# convert between po1 and pa14

pae <- kegg_conv('pae', 'ncbi-geneid')
pau <- kegg_conv('ncbi-geneid', 'pau')

nrow(pae)
nrow(pau)

# List all PO1 genes

genes <- kegg_list(db = 'pae')

# Convert between KEGG, Uniprot, NCBI identifiers

ncbi_geneid <- kegg_conv('pae', 'ncbi-geneid') |>
    rename(kid = pae) |>
    inner_join(genes, by = 'kid')

ncbi_protid <- kegg_conv('pae', 'ncbi-proteinid') |>
    rename(kid = pae) |>
    inner_join(genes, by = 'kid')

uniprot <- kegg_conv('pae', 'uniprot') |>
    rename(kid = pae) |>
    inner_join(genes, by = 'kid')

# bring all info for the genes together

genes <- genes |>
    inner_join(select(ncbi_geneid, -dscr), by = 'kid') |>
    inner_join(select(ncbi_protid, -dscr), ncbi_protid, by = 'kid') |>
    inner_join(select(uniprot, -dscr), by = 'kid') |>
    mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)}))

# Clean and save as json

genes <- genes |>
    group_by(kid) |>
    summarise(
        dscr = map(dscr, c),
        ncbi_geneid = map(`ncbi-geneid`, list),
        ncbi_proteinid = map(`ncbi-proteinid`, list),
        uniprot = map(uniprot, list)
    ) |>
    ungroup()

jsonlite::write_json(
        x = genes,
        '../../xnet/xgph-db/import/genes.json',
        simplifyVector = TRUE,
        null           = 'null',
        na             = 'null',
        auto_unbox     = TRUE
    )

# pathways

# list all PO1 pathways

path <- kegg_list(db = 'pathway', org = 'pae') |>
    mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
    mutate(dscr = gsub('^(.+)\\s-\\s.*$', '\\1', dscr))

# write as a json file

jsonlite::write_json(
    x = path,
    '../../xnet/xgph-db/import/pathways.json',
    simplifyVector = TRUE,
    null           = 'null',
    na             = 'null',
    auto_unbox     = TRUE
)

# link pathways to the genes

edges_path_gene <- kegg_link('pathway', 'pae') |>
    mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
    setNames(c('path_kid', 'gene_kid')) |>
    mutate(rel = 'PATHWAY')

# write as a json file

jsonlite::write_json(
    x = edges_path_gene,
    '../../xnet/xgph-db/import/edges_path_gene.json',
    simplifyVector = TRUE,
    null           = 'null',
    na             = 'null',
    auto_unbox     = TRUE
)

# Enzymes data

# fetch all enzymes

enz <- kegg_list('enzyme') |>
     mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)}))

# fetch all orthologs

ko <- kegg_list('ko') |>
    mutate(
        kid  = gsub('^.+:(.+)$', '\\1', kid),
        dscr = gsub('^(.+)\\s+\\[.*\\]$', '\\1', dscr)
        )

# create edges between enzymes and orthologs

edges_ko_enz <- kegg_link('enzyme', 'ko') |>
    mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
    setNames(c('enz_kid', 'ko_kid'))

# create edges between orthologs and pae genes

edges_ko_pae <- kegg_link('ko', 'pae') |>
    mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
    set_names('ko_kid', 'gene_kid')

# create edges between pae genes and enzymes

edges_gene_enz <- kegg_link('enzyme', 'pae') |>
    mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
    setNames(c('enz_kid', 'gene_kid'))

# filter edges between orth and enz by pae

edges_ko_enz |>
    filter(ko_kid %in% edges_ko_pae$ko_kid, enz_kid %in% edges_gene_enz$enz_kid)

# create edges between enz and pathways

edges_enz_path <- kegg_link('enzyme', 'pathway') |>
    mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
    setNames(c('enz_kid', 'path_kid'))

edges_enz_path <- edges_enz_path |>
    filter(grepl('map.*', path_kid)) |>
    mutate(path_kid = gsub('map(.+)', 'pae\\1', path_kid)) |>
    filter(path_kid %in% path$kid)

# export data

# vertices

jsonlite::write_json(
    x = enz,
    '../../xnet/xgph-db/import/enz.json',
    simplifyVector = TRUE,
    null           = 'null',
    na             = 'null',
    auto_unbox     = TRUE
)

jsonlite::write_json(
    x = ko,
    '../../xnet/xgph-db/import/ko.json',
    simplifyVector = TRUE,
    null           = 'null',
    na             = 'null',
    auto_unbox     = TRUE
)

# edges

jsonlite::write_json(
    x = edges_ko_enz,
    '../../xnet/xgph-db/import/edges_ko_enz.json',
    simplifyVector = TRUE,
    null           = 'null',
    na             = 'null',
    auto_unbox     = TRUE
)

jsonlite::write_json(
    x = edges_ko_pae,
    '../../xnet/xgph-db/import/edges_ko_pae.json',
    simplifyVector = TRUE,
    null           = 'null',
    na             = 'null',
    auto_unbox     = TRUE
)

jsonlite::write_json(
    x = edges_gene_enz,
    '../../xnet/xgph-db/import/edges_gene_enz.json',
    simplifyVector = TRUE,
    null           = 'null',
    na             = 'null',
    auto_unbox     = TRUE
)

jsonlite::write_json(
    x = edges_enz_path,
    '../../xnet/xgph-db/import/edges_enz_path.json',
    simplifyVector = TRUE,
    null           = 'null',
    na             = 'null',
    auto_unbox     = TRUE
)

# reactions data

# get reactions

rct <- kegg_list('reaction') |>


# Compounds data



g1 <- kegg_linkage_graph(as.matrix(kegg_info('reaction')))
g2 <- kegg_linkage_graph(as.matrix(kegg_info('pau')))

res1 <- kegg_link_dbs(g1)
res2 <- kegg_link_dbs(g2)

# head(as.data.frame(mapply(gsub, pattern = '^.+:(.+)$', replacement = '\\1', res2))
data.frame(mapply(gsub, res2, MoreArgs = list(pattern = '^.+:(.+)$', replacement = '\\1'))) |>
    group_by(pau) |>
    summarise(across(everything(), \(x) {list(unique(x))}))

igraph::shortest_paths(g, 'compound', 'genes')

v1 <- toVisNetworkData(g1)
v2 <- toVisNetworkData(g2)

visNetwork(nodes = v1$nodes, edges = v1$edges) #|>
visHierarchicalLayout()

visNetwork(nodes = v2$nodes, edges = v2$edges) #|>
visHierarchicalLayout()

dend <- cluster_label_prop(g)
plot_dendrogram(dend)
plot(dend, g)

URL = '/get/pau00010/kgml'
#URL = '/list/pathway'

# 'path:pau02025'
x <- .kegg_get(kid = 'path:hsa00010', option = 'kgml')

# build simple relation graph for enzymes (gene products)

links <- .kegg_kgml_exp_rct_rel(x) |>
    dplyr::mutate(
        label = byname,
        dashes = (byname == 'Group'),
        title = paste(byname, byval),
        smooth = TRUE,
        shadow = TRUE
    )

nodes <- .kegg_kgml_get_entries(x) |>
    dplyr::mutate(
        label = paste(type, id, sep = ': '),
        group = type,
        shape = dplyr::case_when(
            type == 'gene' ~ 'square',
            type == 'compound' ~ 'triangle',
            type == 'map' ~ 'database',
            TRUE ~ 'diamond'
        ),
        title = sprintf("<p><b>%s</b></p>", kid),
        shadow = TRUE
    )

visNetwork(nodes, links) |>
    visEdges(arrows = 'middle', smooth = TRUE) |>
    visOptions(manipulation = TRUE)
utubun/keggr documentation built on Jan. 29, 2022, 5:08 a.m.