##
## renderWebPage.R: Present data as webpage(s)
##
## Usage: call the main() from command line:
## R -e 'source("R/renderWebPage.R"); main()'
library(assertthat)
library(dplyr)
library(glue)
library(readr)
library(rvest)
library(stringr)
library(whisker)
##
## Settings of global variables
##
WEB_TEMPLATE <- "web/template.html"
OUTPUT <- "OUTPUT"
WEB_OUT <- glue::glue("{OUTPUT}/index.target.with.data.html")
WEB_OUT_NODATA <- glue::glue("{OUTPUT}/index.target.no_data.html")
TARGET.INPUT <- "data/target_list.tsv"
TARGET_LIST.RDS <- glue::glue("{OUTPUT}/targetList.rds")
# CLUE.INPUT <- glue::glue("{OUTPUT}/clue.tsv")
PATCHED.CLUE.INPUT <- glue::glue("{OUTPUT}/clue_patched.rds")
CHEMBL.URL.TEMPLATE <- "https://www.ebi.ac.uk/chembl/target_report_card"
MOLECULAR_CSV.OUTPUT <- glue::glue("{OUTPUT}/molecular_background.csv")
COMPOUNDS_CSV.OUTPUT <- glue::glue("{OUTPUT}/compounds_summary.csv")
#' Main function
#'
#' Load data and represent it in HTML format.
#'
#' @return
main <- function() {
## read curated input names
targetList <- readRDS(TARGET_LIST.RDS)
## read input data prepared by dataPatch.R
patchedData <- readRDS(PATCHED.CLUE.INPUT)
patchedData <- targetList %>%
left_join(patchedData) %>%
rowwise() %>%
mutate(has_data = (!is.na(pert_iname) && !is.na(UNIPROT_KB_ID)))
renderWebPage(
patchedData, title = "Has CLUE.IO Entries", outputHTML = WEB_OUT
)
warnings()
}
#' Render web page
#'
#' @param result List of data structures to be visualized as a web page.
#' @param title Extension title.
#' @param outputHTML Name of result HTML file.
#'
#' @return Invisible NULL.
renderWebPage <- function(result, title, outputHTML = NULL) {
resultHasNoData <- result %>%
filter(has_data == FALSE) %>%
arrange(Label, HUGO)
result <- result %>%
filter(has_data == TRUE) %>%
arrange(Label, HUGO)
## create summary tables ------------------------------------------------
molecularBackground <- renderMolecularBackgroundSummary(result)
compoundsSummary <- renderCompoundsSummary(result)
## create overview list ------------------------------------------------
overview <- list(
targetsWithNoClueData = resultHasNoData %>% pull(HUGO) %>% unique(),
targetsWithClueData = result %>% pull(HUGO) %>% unique(),
totalCompoundCount = result %>%
select(pert_iname) %>% distinct() %>% nrow()
)
overview$targetsWithNoClueDataCount <- length(overview$targetsWithNoClueData)
overview$targetsWithClueDataCount <- length(overview$targetsWithClueData)
overview$targetsWithNoClueData <- paste(overview$targetsWithNoClueData, collapse = ', ')
overview$targetsWithClueData <- paste(overview$targetsWithClueData, collapse = ', ')
overview$avgCompoundsPerTarget <- round(
overview$totalCompoundCount / overview$targetsWithClueDataCount, digits = 2
)
countsByFinalStatus <- overviewByFinalStatus(result)
overview <- c(overview, countsByFinalStatus)
# result %>% select(pert_iname) %>% distinct()
# result %>% select(pubchem_cid) %>% distinct()
# result %>% select(chembl_id) %>% distinct()
# result %>% select(drugbank_id) %>% distinct()
## prepare summary tables for access from template ----------------------
molecularBackground <- whisker::rowSplit(molecularBackground)
compoundsSummary <- whisker::rowSplit(compoundsSummary)
## transforming data for rendering ----
message("transforming data for rendering...")
## - this should be an iteration on each HUGO group
## - collect pert groups for each gene group
## Maybe I shouldn't join result tables in download function.
collection <- list()
for (geneGroup in unique(result$HUGO)) {
message(glue::glue("processing gene group: {geneGroup}..."))
groupName <- geneGroup
# overwrite variable with data subset
geneGroup <- result %>%
filter(HUGO == groupName)
hasData <- geneGroup$has_data[1]
label <- geneGroup$Label[1]
UNIPROT_KB_ID <- geneGroup$UNIPROT_KB_ID[1]
# TODO: Old solution was: geneGroup$protein_external_id[1]
uniProtData <- result %>%
select(HUGO, UniProtData) %>%
filter(HUGO == groupName) %>%
distinct() %>%
pull(UniProtData)
## unwrap UniProt details
stringID <- uniProtData[[UNIPROT_KB_ID]]$STRING
keggID <- uniProtData[[UNIPROT_KB_ID]]$KEGG
uniProtSubCellular <- uniProtSubCellular(uniProtData[[UNIPROT_KB_ID]])
uniProtMolecular <- uniProtMolecularFunction(uniProtData[[UNIPROT_KB_ID]])
uniProtBiological <- uniProtBiologicalProcess(uniProtData[[UNIPROT_KB_ID]])
## create an iterable list of list of "pathwayID" and "pathwayName" pairs
reactomePathways <- uniProtData[[UNIPROT_KB_ID]]$Reactome %>%
whisker::iteratelist(name = "pathwayID", value = "pathwayName")
## group by pert_iname
# drugBankId <- geneGroup$drugbank_id[1]
# chemblId <- geneGroup$chembl_id[1]
# finalStatus <- geneGroup$final_status[1]
grouppedByPerts <- geneGroup %>%
# select(-c(HUGO, protein_external_id, has_data)) %>%
select(-c(HUGO, has_data)) %>%
group_by(pert_iname) %>%
group_split()
## collect pert groups per genes and creates
grouppedByPerts <- multivaluedCellsToHTML(grouppedByPerts)
collection <- c(collection, list(list(
target = groupName,
stringID = stringID,
keggID = keggID,
data = grouppedByPerts,
groupLabel = label,
UNIPROT_KB_ID = UNIPROT_KB_ID,
hasData = tolower(hasData),
uniProtSubCell = uniProtSubCellular,
uniProtMolecular = uniProtMolecular,
uniProtBiological = uniProtBiological,
reactomePathways = reactomePathways
)))
} # end of main for loop
## export as web page ---------------------------------------------------
message(glue("reading web template: {WEB_TEMPLATE}"))
template <- readr::read_file(WEB_TEMPLATE)
creationTime <- format(Sys.time(), usetz = TRUE)
message(glue("rendering web page, template is '{WEB_TEMPLATE}'"))
renderResult <- whisker::whisker.render(template, debug = TRUE, data = list(
targets = collection,
overview = overview,
molecularBackground = molecularBackground,
compoundsSummary = compoundsSummary,
title = title,
creationTime = creationTime
))
readr::write_file(renderResult, file = outputHTML)
message(glue("rendered web page is saved into '{outputHTML}'"))
invisible(NULL)
}
#' Prepare multiple values of table cell
#'
#' @param dataList List of dataframes
#'
#' @return Pre-processed dataframes to be ready to show multiple values in
#' sepecific HTML table cells.
#' @import dplyr
multivaluedCellsToHTML <- function(dataList) {
assertthat::assert_that(is.list(dataList))
assertthat::assert_that(length(dataList) > 0)
assertthat::assert_that(is.data.frame(dataList[[1]]))
cellsToHTML <- function(dataframe) {
# TODO: Does it eliminate duplications?
## collapsing moa
moa <- paste(unique(dataframe$moa), collapse = ", <br/><br/>")
dataframe <- dataframe %>%
# select(-c(moa, pubchem_cid, chembl_id)) %>%
select(
pert_iname,
status_source,
drugbank_id,
pubchem_cid,
chembl_id,
final_status
) %>%
distinct() %>%
mutate(
chembl_id = chemblHTML(chembl_id),
pubchem_cid = pubChemHTML(pert_iname, pubchem_cid),
drugbank_id = drugBankHTML(drugbank_id)
) %>%
rowwise() %>%
mutate(chem_drugs = paste0(
na.exclude(c(chembl_id, pubchem_cid, drugbank_id)),
collapse = "<br/>")
) %>%
select(-c(chembl_id, pubchem_cid, drugbank_id)) %>%
distinct()
## strict verification
browser(expr = (nrow(dataframe) != 1))
assertthat::assert_that(nrow(dataframe) == 1)
## set collapsed moa
dataframe$moa <- moa
return(dataframe)
}
dataList <- lapply(dataList, cellsToHTML)
return(dataList)
}
#' ChEMBL Id to HTML link
#'
#' @param chemblId
#'
#' @return Character object containing a HTML code fragment with
#' an "anchor" element.
chemblHTML <- function(chemblId) {
## used in case of non-numeric ChEMBL Id
unexpectedChemblId <- function(chemblId) {
return(
if_else(chemblId != "",
glue::glue("Unexpected ChEMBL value: '{chemblId}'"),
""
)
)
}
chemblId <- unique(chemblId)
## choose appropriate one
HTMLtext <- dplyr::if_else(is.na(chemblId),
## TRUE branch
glue::as_glue(NA),
## FALSE branch
dplyr::if_else(stringr::str_ends(chemblId, pattern = "^CHEMBL[0-9]+$"),
## TRUE branch
{
link <- glue::glue("{CHEMBL.URL.TEMPLATE}/{chemblId}")
aHref(link = link, titleText = chemblId)
},
unexpectedChemblId(chemblId)
) # inner if_else
) # outer if_else
return(if_else(is.na(HTMLtext),
HTMLtext,
paste(HTMLtext, collapse = "<br/>")
))
}
#' Create DrugBank links
#'
#' @param drugBankId DrugBank ID(s)
#'
#' @return HTML "a" snippet(s) that can be used in HTML document directly.
drugBankHTML <- function(drugBankId) {
drugBankId <- unique(drugBankId)
HTMLtext <- if_else(stringr::str_ends(drugBankId, pattern = "^DB[0-9]+$"), {
link <- glue::glue("http://www.drugbank.ca/drugs/{drugBankId}")
aHref(link = link, titleText = drugBankId)
},
if_else(is.na(drugBankId),
glue::as_glue(NA),
## text as is
glue::as_glue(drugBankId)
)
)
return(if_else(is.na(HTMLtext),
HTMLtext,
paste(HTMLtext, collapse = "<br/>")
))
}
#' Create PubChem links
#'
#' @param pert_iname compound name
#' @param pubChemId PubChem ID(s)
#'
#' @return HTML "a" snippet(s) that can be used in HTML document directly.
pubChemHTML <- function(pert_iname, pubChemId) {
pubChemId <- unique(pubChemId)
pert_iname <- unique(pert_iname)
## choose appropriate one
id <- if_else(is.na(pubChemId), pert_iname, as.character(pubChemId))
label <- if_else(is.na(pubChemId), glue::glue("PubChem??{id}??"),
glue::glue("PubChem: {id}"))
url <- glue::glue("https://pubchem.ncbi.nlm.nih.gov/compound/{id}")
return(
paste(aHref(link = url, titleText = label), collapse = "<br/>")
)
}
#' Create sub-cellular list for iteration
#'
#' @param uniProtDataItem
#'
#' @return List of GOId-subCellLocation pairs.
uniProtSubCellular <- function(uniProtDataItem) {
uniProtDataItem$subCellularLocation %>%
whisker::iteratelist(name = "GOId", value = "subCellLocation")
}
#' Create list of molecular functions for iteration
#'
#' @param uniProtDataItem
#'
#' @return List of GOId-molecularFunction pairs.
uniProtMolecularFunction <- function(uniProtDataItem) {
uniProtDataItem$molecularFunction %>%
whisker::iteratelist(name = "GOId", value = "molecularFunction")
}
#' Create list of biological processes for iteration
#'
#' @param uniProtDataItem
#'
#' @return List of GOId-biologicalProcess pairs.
uniProtBiologicalProcess <- function(uniProtDataItem) {
uniProtDataItem$biologicalProcess %>%
whisker::iteratelist(name = "GOId", value = "biologicalProcess")
}
#' Create an "anchor" element
#'
#' @param link Parameter of "href" attribute.
#' @param titleText character content of "a" element.
#'
#' @return HTML "a" snippet that can be used in HTML document directly.
aHref <- function(link, titleText) {
return(glue::glue("<a href=\"{link}\" target=\"_blank\">{titleText}</a>"))
}
## Summary Tables ----
renderMolecularBackgroundSummary <- function(cluePatched) {
cluePatched <- cluePatched %>%
dplyr::select(
HUGO, UniProtData, NumberOfKEGGPathways, NumberOfSTRINGInteractors
) %>%
dplyr::distinct() %>%
dplyr::arrange(HUGO)
cluePatched <- cluePatched %>% dplyr::transmute(
HUGO = HUGO,
ReactomePathways = length(UniProtData$Reactome),
KEGGPathways = dplyr::if_else(is.na(NumberOfKEGGPathways), as.integer(0), as.integer(NumberOfKEGGPathways)),
STRINGInteractors = dplyr::if_else(is.na(NumberOfSTRINGInteractors), as.integer(0), as.integer(NumberOfSTRINGInteractors)),
MolecularFunctions = length(UniProtData$molecularFunction),
SubcellularLocations = length(UniProtData$subCellularLocation),
BiologicalProcesses = length(UniProtData$biologicalProcess)
)
# print and save it as CSV
print(cluePatched)
return(
readr::write_csv(cluePatched, MOLECULAR_CSV.OUTPUT)
)
}
renderCompoundsSummary <- function(cluePatched) {
## TODO: EMA counts must be added at least Launched values
cluePatched <- cluePatched %>%
dplyr::select(
HUGO,
pert_iname,
final_status,
pubchem_cid,
chembl_id,
drugbank_id,
PubMedCounter
) %>%
dplyr::distinct() %>%
dplyr::mutate(
Preclinical = stringToNumeric(final_status, 'Preclinical'),
Phase1 = stringToNumeric(final_status, 'Phase 1'),
Phase2 = stringToNumeric(final_status, 'Phase 2'),
Phase3 = stringToNumeric(final_status, 'Phase 3'),
Launched = stringToNumeric(final_status, 'Launched'),
PubMedCounter = dplyr::if_else(is.na(PubMedCounter), as.integer(0), PubMedCounter),
PubChem = NA_to_zero(dplyr::if_else(pubchem_cid != '', 1, 0)),
ChEMBL = NA_to_zero(dplyr::if_else(chembl_id != '', 1, 0)),
DrugBank = NA_to_zero(dplyr::if_else(drugbank_id != '', 1, 0))
) %>%
dplyr::group_by(HUGO) %>%
dplyr::summarise(
Preclinical = sum(Preclinical),
Phase1 = sum(Phase1),
Phase2 = sum(Phase2),
Phase3 = sum(Phase3),
Launched = sum(Launched),
PubMed = sum(PubMedCounter),
PubChem = sum(PubChem),
ChEMBL = sum(ChEMBL),
DrugBank = sum(DrugBank)
)
# print and save it as CSV
print(cluePatched)
return(
readr::write_csv(cluePatched, COMPOUNDS_CSV.OUTPUT)
)
}
overviewByFinalStatus <- function(cluePatched) {
cluePatched <- cluePatched %>%
dplyr::select(
pert_iname,
final_status
) %>%
dplyr::distinct() %>%
dplyr::mutate(
Preclinical = stringToNumeric(final_status, 'Preclinical'),
Phase1 = stringToNumeric(final_status, 'Phase 1'),
Phase2 = stringToNumeric(final_status, 'Phase 2'),
Phase3 = stringToNumeric(final_status, 'Phase 3'),
Launched = stringToNumeric(final_status, 'Launched'),
Withdrawn = stringToNumeric(final_status, 'Withdrawn')
)
saveRDS(cluePatched, 'countsByFinalStatus.rds')
countsByFinalStatus <- list(
PreclinicalCount = sum(cluePatched$Preclinical),
Phase1Count = sum(cluePatched$Phase1),
Phase2Count = sum(cluePatched$Phase2),
Phase3Count = sum(cluePatched$Phase3),
LaunchedCount = sum(cluePatched$Launched),
WithdrawnCount = sum(cluePatched$Withdrawn)
)
return(countsByFinalStatus)
}
NA_to_zero <- function(value) {
return(dplyr::if_else(is.na(value), 0, value))
}
NA_to_zero <- Vectorize(NA_to_zero)
stringToNumeric <- function(actual, expected) {
return(NA_to_zero(
dplyr::if_else(
stringr::str_ends(stringr::str_trim(actual), expected), 1, 0
)
))
}
stringToNumeric <- Vectorize(stringToNumeric)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.