options(htmltools.dir.version = FALSE)
library(knitr)
library(htmlwidgets)
library(magrittr)
knitr::opts_chunk$set(
  fig.width=9, fig.height=3.5, fig.retina=3,
  out.width = "100%",
  cache = FALSE,
  echo = TRUE,
  message = FALSE, 
  warning = FALSE,
  hiline = TRUE
)
hwIframe <- function(p, style=""){
   d <- file.path(
      sub("[.]Rmd$", "_files", knitr:::knit_concord$get("infile")),
      "htmlwidgets_plots"
   )
   dir.create(d, recursive=TRUE, showWarnings=FALSE)
   f <- paste0(opts_current$get("label"), ".html")
   htmlwidgets::saveWidget(
      p,
      # as_widget(p),
      file=file.path(normalizePath(d), f),
      selfcontained=FALSE,
      libdir="lib"
   )
   toRet <- sprintf(
      '<iframe src="%s" style="%s"></iframe>',
      file.path(d, f),
      style
   )
   return(toRet)
}
notes <- xml2::read_xml("notes.md") %>% 
   xml2::as_list() %>%
   `[[`("slides") %>% 
   lapply(function(x){
      toRet <- x %>%
         unlist()
      toRet <- toRet[which(toRet != "[ comment ]")]
      toRet %>% 
         paste(collapse="\n") %>% 
         stringr::str_remove("^\n") %>%
         stringr::str_remove("\n$")
   }) %>% 
   `[`(which(unlist(lapply(., function(x)x!=""))))

class: title-slide, inverse, left, middle background-image: url("UCB-theme/UCB-Cover.jpg") background-size: cover

Managing and leveraging knowledge catalogs with TKCat

Patrice Godard | useR!2022 | 23 Jun 2022

???

cat(notes$title)
library(xaringanthemer)
style_mono_accent(
  base_color="#001489", black_color="#4B4F54"
)

layout: true background-image: url("UCB-theme/UCB-logo-foot.png"), url("UCB-theme/UCB-top-left.png"), url("UCB-theme/UCB-bottom-right-grey.png"), url("media/user2022-logo.png") background-position: 3% 97%, 3% 3%, 98% 90%, 98% 3% background-size: 15%, 6%, 6%, 6%


Translational Bioinformatics at UCB

???

cat(notes$ucb_tbn)

--

Understanding disease at the cellular and molecular level to identify relevant therapeutic approaches for patient populations

--

Understanding disease at the cellular and molecular level to identify relevant therapeutic approaches for patient populations

--

Understanding disease at the cellular and molecular level to identify relevant therapeutic approaches for patient populations


From data to wisdom

Relationships between data (empty dots), information (colored dots), knowledge (connected colored dots), insight (dots of interest) and wisdom (path between dots of interest)

Credit: from twitter (unknown author)

???

cat(notes$knowledge)

--

Adding conspiracy theory to the figure (dots connected for drawing a unicorn)

--

Rectangle highlighting the knowledge related drawing


Expected features of the knowledge to manage

???

cat(notes$knowledge_features)

--

Logo of the ReDaMoR R package

--

Logo of a folder Logo of a csv file Logo of json file

--

BED dictionary icon DODO dictionary icon

BED: a Biological Entity Dictionary | DODO: Dictionary Of Disease Ontologies | https://clickhouse.com/

--

Logo of the ClickHouse DBMS

BED: a Biological Entity Dictionary | DODO: Dictionary Of Disease Ontologies | https://clickhouse.com/


MDB: a Modeled Database for each knowledge resource

https://github.com/patzaw/ReDaMoR | https://github.com/patzaw/TKCat

MDB icon

Features

???

cat(notes$mdb_features)

--

MDB icon

--

Logo of the ReDaMoR R package

MDB icon

--

MDB icon

--

MDB icon

Two MDBs with collections

--

MDB icon

White rectangle

--

TKCat

Logo of the TKCat R package


Drafting a data model in R with ReDaMoR

HPO: Köhler et al. (2019) | ReDaMoR user guide > Drafting a data model

library(readr)
hpo_data_dir <- system.file("examples/HPO-subset", package="ReDaMoR")
{{HPO_hp <- read_tsv(file.path(hpo_data_dir, "HPO_hp.txt"))}}
{{HPO_diseases <- read_tsv(file.path(hpo_data_dir, "HPO_diseases.txt"))}}
{{HPO_diseaseHP <- read_tsv(file.path(hpo_data_dir, "HPO_diseaseHP.txt"))}}

???

cat(notes$draft)

--

library(ReDaMoR)
{{hpo_model <- df_to_model(HPO_hp, HPO_diseases, HPO_diseaseHP)}}
plot(hpo_model)
library(ReDaMoR)
hpo_model <- df_to_model(HPO_hp, HPO_diseases, HPO_diseaseHP)
hpo_model <- lapply(hpo_model, function(x){
   x$display$y <- 0
   return(x)
}) %>%
   RelDataModel()
hpo_model$HPO_hp$display$x <- 0
hpo_model$HPO_diseases$display$x <- 200
hpo_model$HPO_diseaseHP$display$x <- 400
pp <- plot(hpo_model) %>%
   visOptions(width="100%", height="150px")
pp$sizingPolicy <- htmlwidgets::sizingPolicy(
   browser.defaultWidth = "1000px",
   browser.defaultHeight = "150px",
   browser.padding=0,
   browser.fill = FALSE
)
cat(hwIframe(
   p=pp,
   style="height:200px; width:100%; border-style:none; background-color:transparent;"
))

Creating a data model in R with ReDaMoR

Try the GUI in shinyapps.io | ReDaMoR user guide > Creating a data model

hpo_model <- model_relational_data(hpo_model)

The model_relational_data() graphical user interface

The model_relational_data() graphical user interface

hpo_model <- read_json_data_model(
   "hpo_model.json"
)[c("HPO_hp", "HPO_diseases", "HPO_diseaseHP")]
pp <- plot(hpo_model) %>%
   visOptions(
      width="100%", height="315px",
      nodesIdSelection=list(
         enabled=TRUE, selected="HPO_hp",
         useLabels=FALSE,
         style="visibility:hidden;"
      )
   )
pp$sizingPolicy <- htmlwidgets::sizingPolicy(
   browser.defaultWidth = "530px",
   browser.defaultHeight = "315px",
   browser.padding=0,
   browser.fill = FALSE
)
cat(hwIframe(
   p=pp,
   style="position:absolute; top:250px; left:80px; height:355px; width:550px; border-style:none; background-color:transparent; z-index:2;"
))

???

cat(notes$model)

Confronting data to the model

ReDaMoR user guide > Confronting data

.pull-left[ wzxhzdk:14

HPO model limited to 3 tables

] .pull-right[
wzxhzdk:15
] .pull-left[ wzxhzdk:16 ] .pull-right[
wzxhzdk:17
]

???

cat(notes$confront)

class: comp-title

Creating and using an MDB with TKCat

TKCat user guide > Create an MDB | TKCat user guide > Leveraging MDB

.pull-left[

MDB creation

library(TKCat)
hpo <- `memoMDB`(
   `dataTables`=list(
      "HPO_hp"=HPO_hp,
      "HPO_diseases"=HPO_diseases,
      "HPO_diseaseHP"=HPO_diseaseHP
   ),
   `dataModel`=hpo_model,
   `dbInfo`=list(
      name="miniHPO",
      title="Very small extract of the human phenotype ontology",
      description="For demonstrating ReDaMoR and TKCat capabilities...",
      url="https://hpo.jax.org/app/",
      version="0.1",
      maintainer="Patrice Godard <patrice.godard@gmail.com>"
   )
)
library(TKCat)
hpo <- memoMDB(
   dataTables=list(
      "HPO_hp"=HPO_hp,
      "HPO_diseases"=HPO_diseases,
      "HPO_diseaseHP"=HPO_diseaseHP
   ),
   dataModel=hpo_model,
   dbInfo=list(
      name="miniHPO",
      title="Very small extract of the human phenotype ontology",
      description="For demonstrating ReDaMoR and TKCat capabilities...",
      url="https://hpo.jax.org/app/",
      version="0.1",
      maintainer="Patrice Godard <patrice.godard@gmail.com>"
   )
)

]

???

cat(notes$mdb)

--

.pull-right[

Explore and retrieve information

db_info(hpo)
data_model(hpo)
hpo %>% select(HPO_diseases, HPO_diseaseHP)
hpo %>% pull(HPO_diseases) %>% 
   head(3)
hpo %>% pull(HPO_diseases) %>%
   head(3)

]


class: code-marg-5

Leverage the MDB data model: filter

TKCat user guide > Filtering and joining

.pull-left[

dims(hpo) %>% select(name, nrow)

]

.pull-right[

data_model(hpo) %>% plot()
pp <- plot(data_model(hpo)) %>%
   visOptions(width="100%", height="100px")
pp$sizingPolicy <- htmlwidgets::sizingPolicy(
   browser.defaultWidth = "500px",
   browser.defaultHeight = "100px",
   browser.padding=0,
   browser.fill = FALSE
)
cat(hwIframe(
   p=pp,
   style="height:120px; width:100%; border-style:none; background-color:transparent;"
))

]

_

fhpo <- hpo %>% `filter(HPO_hp=stringr::str_detect(description, "eye"))`
fhpo <- hpo %>% filter(HPO_hp=stringr::str_detect(description, "eye"))

???

cat(notes$filter)

--

.pull-left[

fhpo %>% dims() %>% select(name, nrow)

]

.pull-right[

data_model(fhpo) %>% plot()
pp <- plot(data_model(fhpo)) %>%
   visOptions(width="100%", height="100px")
pp$sizingPolicy <- htmlwidgets::sizingPolicy(
   browser.defaultWidth = "500px",
   browser.defaultHeight = "100px",
   browser.padding=0,
   browser.fill = FALSE
)
cat(hwIframe(
   p=pp,
   style="height:120px; width:100%; border-style:none; background-color:transparent;"
))

]


class: code-marg-5

Leverage the MDB data model: join

TKCat user guide > Filtering and joining

.pull-left[

dims(fhpo) %>% select(name, nrow)

]

.pull-right[

data_model(fhpo) %>% plot()
pp <- plot(data_model(fhpo)) %>%
   visOptions(width="100%", height="100px")
pp$sizingPolicy <- htmlwidgets::sizingPolicy(
   browser.defaultWidth = "500px",
   browser.defaultHeight = "100px",
   browser.padding=0,
   browser.fill = FALSE
)
cat(hwIframe(
   p=pp,
   style="height:120px; width:100%; border-style:none; background-color:transparent;"
))

]

_

jhpo <- fhpo %>% `join_mdb_tables(c("HPO_hp", "HPO_diseaseHP", "HPO_diseases"))`
jhpo <- fhpo %>% join_mdb_tables(c("HPO_hp", "HPO_diseaseHP", "HPO_diseases"))

???

cat(notes$join)

--

.pull-left[

jhpo %>% dims() %>% select(name, nrow)

]

.pull-right[

data_model(jhpo) %>% plot()
pp <- plot(data_model(jhpo)) %>%
   visOptions(width="100%", height="100px")
pp$sizingPolicy <- htmlwidgets::sizingPolicy(
   browser.defaultWidth = "500px",
   browser.defaultHeight = "100px",
   browser.padding=0,
   browser.fill = FALSE
)
cat(hwIframe(
   p=pp,
   style="height:120px; width:100%; border-style:none; background-color:transparent;"
))

]


MDB implementations

TKCat user guide > MDB in files | TKCat user guide > MDB implementations

a

Double arrow Double arrow Double arrow

.pull-left[ #### In memory (`as_memoMDB()`) - All the data loaded in R memory
  - Fast but greedy
  - Convenient for using whole tables ]
.pull-left[ #### In files (`as_fileMDB()`) - Data in files until requested (`pull()`, `filter()`, ...) - Not convenient for subsetting (slow) - Convenient for archiving and sharing ]
.pull-left[ #### In ClickHouse DBMS (`as_chMDB()`) - Data in DBMS until requested (`pull()`, `filter()`, ...) - Efficient to get subsets
(`get_query()`) - Convenient for sharing and managing access - Versioning ]

???

cat(notes$implementations)

TKCat: a data warehouse management system

TKCat user guide > chTKCat | TKCat user guide > chTKCat operations

.left-column[

k <- chTKCat(
   host="localhost",
   user="default",
   password=""
)
explore_MDBs(k)

]

.right-column[

explore_MDBs(k) graphical user interface: available knowledge resources

]

???

cat(notes$tkcat)

--

explore_MDBs(k) graphical user interface: explore the model of a specific resource

class: code-marg-5

Merging MDBs with collections

ClinVar: Landrum et al. (2018) | TKCat user guide > Merging with collections

clinvar <- read_fileMDB(system.file("examples/ClinVar", package="TKCat"))
hpo <- hpo %>% add_collection_member(
   collection="Condition",
   table="HPO_diseases",
   condition=list(
      value="Disease", static=TRUE
   ),
   source=list(
      value="db", static=FALSE
   ),
   identifier=list(
      value="id", static=FALSE
   )
)
shared_collections <- get_shared_collections(
   hpo, clinvar
)
hpo_clinvar <- merge(
   hpo, clinvar,
   by=shared_collections[1,],
   dmAutoLayout=FALSE
)
tp <- hpo_clinvar %>% data_model()
tp$Condition_1_HPO_diseases_1_ClinVar_traitCref$display$x <-
   tp$ClinVar_traitCref$display$x
tp <- unclass(tp)
for(
   n in
   c(
      "HPO_diseases", "HPO_diseaseHP", "HPO_hp",
      "Condition_1_HPO_diseases_1_ClinVar_traitCref"
   )
){
   tp[[n]]$display$y <- -250
}
tp <- RelDataModel(tp)
pp <- tp %>% plot() %>%
   visOptions(width="100%", height="460px")
pp$sizingPolicy <- htmlwidgets::sizingPolicy(
   browser.defaultWidth = "1000px",
   browser.defaultHeight = "460px",
   browser.padding=0,
   browser.fill = FALSE
)
cat(hwIframe(
   p=pp,
   style="height:470px; width:95%; border-style:none; background-color:transparent;"
))

???

cat(notes$merging)

Supported data types

???

cat(notes$types)

--

.pull-left[ - base64 (file) ] .pull-right[ Example of files stored in base64 character ] -- .pull-left[ - matrix and sparse matrix ] .pull-right[ Example of matrix ]

class: comp-title

Acknowledgements

.pull-left[ ### Supporting tools - [tidyverse](https://www.tidyverse.org/) and related packages - [visNetwork](https://datastorm-open.github.io/visNetwork/) - [shiny](https://shiny.rstudio.com/) and related packages - [ClickHouse](https://clickhouse.com/) and [RClickhouse](https://github.com/IMSMWU/RClickhouse) - [Matrix](https://cran.r-project.org/package=Matrix) - **Many others**: - [ReDaMoR dependencies](https://github.com/patzaw/ReDaMoR#dependencies) - [TKCat dependencies](https://github.com/patzaw/TKCat#dependencies) - [CRAN](https://cran.r-project.org/) ] ??? wzxhzdk:50 -- .pull-right[ ### UCB team #### Managers and Developers - Jonathan van Eyll - Liesbeth François - Yuliya Nigmatullina #### Users and testers - Aurélie Bousard - Olga Giannakopoulou - Ioana Cutcutache - Bram Van de Sande - Waqar Ali - John Santa Maria ]

layout: false count: false background-image: url("UCB-theme/UCB-logo-foot.png"), url("media/user2022-logo.png") background-position: 3% 97%, 98% 3% background-size: 15%, 6%

Slides created with xaringan and xaringanthemer

???

cat(notes$closing)


patzaw/TKCat documentation built on June 12, 2025, 11:04 a.m.