#' @title
#' Scrape UMLS Documentation into Tables
#' @description
#' These functions scrape the UMLS Web Documentation to write the \code{\link{mth_documentation_functions}}. They have been kept to enable reproducibility and other long-term maintenance efforts.
#' @keywords internal
#' @importFrom xml2 read_html
#' @importFrom rvest html_nodes html_table
#' @importFrom purrr map map2 pluck
#' @importFrom stringr str_replace_all
#' @importFrom dplyr mutate_all
#' @importFrom broca make_trbl
#' @importFrom secretary press_enter
#' @name documentation_scraping_functions
#' @noRd
NULL
#' @title
#' Scrape Field Value Annotations
#' @inherit documentation_scraping_functions title description
#' @noRd
scrape_field_value_annotations <-
function() {
Sys.sleep(5)
origin <- xml2::read_html("https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/abbreviations.html")
tables <-
origin %>%
rvest::html_nodes("table") %>%
rvest::html_table()
names(tables) <-
tables %>%
purrr::map(~colnames(.)[1]) %>%
purrr::map(stringr::str_replace_all, "[\r\n\t]", " ") %>%
stringr::str_replace_all(pattern = "(^.*?)( |[(]{1})(.*$)",
replacement = "\\1")
tables <-
tables %>%
purrr::map(function(x) x %>%
dplyr::mutate_all(stringr::str_replace_all, "[\r\n\t]", " ")) %>%
purrr::map(function(x) x %>%
dplyr::mutate_all(stringr::str_replace_all, "[ ]{2,}", " ")) %>%
purrr::map(dplyr::mutate_all, as.character) %>%
purrr::map(dplyr::mutate_all, trimws, which = "both")
tribble_part <- list()
for (i in 1:length(tables)) {
tribble_part[[i]] <-
capture.output(broca::make_trbl(tables[[i]]))
}
names(tribble_part) <- names(tables)
tribble_part <-
tribble_part %>%
purrr::map(function(x) c("tibble::tribble(",
x,
")")) %>%
purrr::map(~paste(., collapse = "\n"))
# Add names
output <-
tribble_part %>%
purrr::map2(names(tribble_part),
function(x,y) paste0(y , " = ", x))
for (i in 1:length(output)) {
cat(output[[i]])
secretary::press_enter()
}
}
#' @title
#' Scrape Vocabulary Annotations
#' @inherit documentation_scraping_functions title description
#' @noRd
scrape_vocabulary_annotations <-
function() {
Sys.sleep(5)
origin <- xml2::read_html("https://www.nlm.nih.gov/research/umls/sourcereleasedocs/index.html")
tables <-
origin %>%
rvest::html_nodes("table") %>%
rvest::html_table() %>%
purrr::pluck(1)
broca::make_trbl(tables)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.