library(httr)
library(xml2)
library(RSQLite)
library(visNetwork)
library(igraph)
invisible(sapply(list.files('./R', '.*\\.R', TRUE, TRUE), source))
# list the organisms in KEGG
org <- .kegg_list(db = 'organism')
org[grepl('Pseudomonas aeruginosa.*', org$orgname), ]
# convert between po1 and pa14
pae <- kegg_conv('pae', 'ncbi-geneid')
pau <- kegg_conv('ncbi-geneid', 'pau')
nrow(pae)
nrow(pau)
# List all PO1 genes
genes <- kegg_list(db = 'pae')
# Convert between KEGG, Uniprot, NCBI identifiers
ncbi_geneid <- kegg_conv('pae', 'ncbi-geneid') |>
rename(kid = pae) |>
inner_join(genes, by = 'kid')
ncbi_protid <- kegg_conv('pae', 'ncbi-proteinid') |>
rename(kid = pae) |>
inner_join(genes, by = 'kid')
uniprot <- kegg_conv('pae', 'uniprot') |>
rename(kid = pae) |>
inner_join(genes, by = 'kid')
# bring all info for the genes together
genes <- genes |>
inner_join(select(ncbi_geneid, -dscr), by = 'kid') |>
inner_join(select(ncbi_protid, -dscr), ncbi_protid, by = 'kid') |>
inner_join(select(uniprot, -dscr), by = 'kid') |>
mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)}))
# Clean and save as json
genes <- genes |>
group_by(kid) |>
summarise(
dscr = map(dscr, c),
ncbi_geneid = map(`ncbi-geneid`, list),
ncbi_proteinid = map(`ncbi-proteinid`, list),
uniprot = map(uniprot, list)
) |>
ungroup()
jsonlite::write_json(
x = genes,
'../../xnet/xgph-db/import/genes.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
# pathways
# list all PO1 pathways
path <- kegg_list(db = 'pathway', org = 'pae') |>
mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
mutate(dscr = gsub('^(.+)\\s-\\s.*$', '\\1', dscr))
# write as a json file
jsonlite::write_json(
x = path,
'../../xnet/xgph-db/import/pathways.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
# link pathways to the genes
edges_path_gene <- kegg_link('pathway', 'pae') |>
mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
setNames(c('path_kid', 'gene_kid')) |>
mutate(rel = 'PATHWAY')
# write as a json file
jsonlite::write_json(
x = edges_path_gene,
'../../xnet/xgph-db/import/edges_path_gene.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
# Enzymes data
# fetch all enzymes
enz <- kegg_list('enzyme') |>
mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)}))
# fetch all orthologs
ko <- kegg_list('ko') |>
mutate(
kid = gsub('^.+:(.+)$', '\\1', kid),
dscr = gsub('^(.+)\\s+\\[.*\\]$', '\\1', dscr)
)
# create edges between enzymes and orthologs
edges_ko_enz <- kegg_link('enzyme', 'ko') |>
mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
setNames(c('enz_kid', 'ko_kid'))
# create edges between orthologs and pae genes
edges_ko_pae <- kegg_link('ko', 'pae') |>
mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
set_names('ko_kid', 'gene_kid')
# create edges between pae genes and enzymes
edges_gene_enz <- kegg_link('enzyme', 'pae') |>
mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
setNames(c('enz_kid', 'gene_kid'))
# filter edges between orth and enz by pae
edges_ko_enz |>
filter(ko_kid %in% edges_ko_pae$ko_kid, enz_kid %in% edges_gene_enz$enz_kid)
# create edges between enz and pathways
edges_enz_path <- kegg_link('enzyme', 'pathway') |>
mutate(across(everything(), \(x) {gsub('^.+:(.+)$', '\\1', x)})) |>
setNames(c('enz_kid', 'path_kid'))
edges_enz_path <- edges_enz_path |>
filter(grepl('map.*', path_kid)) |>
mutate(path_kid = gsub('map(.+)', 'pae\\1', path_kid)) |>
filter(path_kid %in% path$kid)
# export data
# vertices
jsonlite::write_json(
x = enz,
'../../xnet/xgph-db/import/enz.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
jsonlite::write_json(
x = ko,
'../../xnet/xgph-db/import/ko.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
# edges
jsonlite::write_json(
x = edges_ko_enz,
'../../xnet/xgph-db/import/edges_ko_enz.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
jsonlite::write_json(
x = edges_ko_pae,
'../../xnet/xgph-db/import/edges_ko_pae.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
jsonlite::write_json(
x = edges_gene_enz,
'../../xnet/xgph-db/import/edges_gene_enz.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
jsonlite::write_json(
x = edges_enz_path,
'../../xnet/xgph-db/import/edges_enz_path.json',
simplifyVector = TRUE,
null = 'null',
na = 'null',
auto_unbox = TRUE
)
# reactions data
# get reactions
rct <- kegg_list('reaction') |>
# Compounds data
g1 <- kegg_linkage_graph(as.matrix(kegg_info('reaction')))
g2 <- kegg_linkage_graph(as.matrix(kegg_info('pau')))
res1 <- kegg_link_dbs(g1)
res2 <- kegg_link_dbs(g2)
# head(as.data.frame(mapply(gsub, pattern = '^.+:(.+)$', replacement = '\\1', res2))
data.frame(mapply(gsub, res2, MoreArgs = list(pattern = '^.+:(.+)$', replacement = '\\1'))) |>
group_by(pau) |>
summarise(across(everything(), \(x) {list(unique(x))}))
igraph::shortest_paths(g, 'compound', 'genes')
v1 <- toVisNetworkData(g1)
v2 <- toVisNetworkData(g2)
visNetwork(nodes = v1$nodes, edges = v1$edges) #|>
visHierarchicalLayout()
visNetwork(nodes = v2$nodes, edges = v2$edges) #|>
visHierarchicalLayout()
dend <- cluster_label_prop(g)
plot_dendrogram(dend)
plot(dend, g)
URL = '/get/pau00010/kgml'
#URL = '/list/pathway'
# 'path:pau02025'
x <- .kegg_get(kid = 'path:hsa00010', option = 'kgml')
# build simple relation graph for enzymes (gene products)
links <- .kegg_kgml_exp_rct_rel(x) |>
dplyr::mutate(
label = byname,
dashes = (byname == 'Group'),
title = paste(byname, byval),
smooth = TRUE,
shadow = TRUE
)
nodes <- .kegg_kgml_get_entries(x) |>
dplyr::mutate(
label = paste(type, id, sep = ': '),
group = type,
shape = dplyr::case_when(
type == 'gene' ~ 'square',
type == 'compound' ~ 'triangle',
type == 'map' ~ 'database',
TRUE ~ 'diamond'
),
title = sprintf("<p><b>%s</b></p>", kid),
shadow = TRUE
)
visNetwork(nodes, links) |>
visEdges(arrows = 'middle', smooth = TRUE) |>
visOptions(manipulation = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.