R/metadata.R

Defines functions get_vocab_data_dictionary get_concept_id_fields get_cdm_data_dictionary list_concept_id_fields list_table_abbr list_vocab_table_names list_cdm_table_names read_cdm_wiki_table

Documented in get_cdm_data_dictionary get_concept_id_fields get_vocab_data_dictionary list_cdm_table_names list_concept_id_fields list_table_abbr list_vocab_table_names read_cdm_wiki_table

#' @title
#' Read GitHub Wiki
#' @description
#' Read the tables from OHDI's GitHub Wiki at
#' \url{https://ohdsi.github.io/CommonDataModel/cdm60.html} to annotate
#' resultsets. This function is run on package load to cache the tables if they
#' are older than the expiration days.
#' @param expiration_days If the cached file was created longer than this time
#' period in days, the GitHub wiki is re-read and cached. Default: 180
#' @seealso
#'  \code{\link[R.cache]{findCache}}
#'  \code{\link[xml2]{read_xml}}
#'  \code{\link[rvest]{html_nodes}},\code{\link[rvest]{html_table}},\code{\link[rvest]{html_text}}
#' @rdname read_cdm_wiki_table
#' @export
#' @importFrom R.cache findCache
#' @importFrom xml2 read_html
#' @importFrom rvest html_nodes html_table html_text

read_cdm_wiki_table <-
  function(expiration_days = 180) {

    cache_file <-
    R.cache::findCache(key = list("read_cdm_wiki_table"),
                       dirs = "chariot")

    if (!is.null(cache_file)) {
    if (as.double(difftime(Sys.time(),file.info(cache_file)$mtime, units = "days")) >= 180) {

        response <- xml2::read_html("https://ohdsi.github.io/CommonDataModel/cdm60.html")

        data <-
          response %>%
          rvest::html_nodes("table") %>%
          rvest::html_table(fill = TRUE)

        names(data) <-
          response %>%
          rvest::html_nodes("h3") %>%
          rvest::html_text()

        lowLevelCache(data = data,
                      query = "read_cdm_wiki_table")

        return(data)

    } else {
       data <- lowLevelLoadCache(query = "read_cdm_wiki_table")

       data


    }
    } else {

      response <- xml2::read_html("https://ohdsi.github.io/CommonDataModel/cdm60.html")

      data <-
        response %>%
        rvest::html_nodes("table") %>%
        rvest::html_table(fill = TRUE)

      names(data) <-
        response %>%
        rvest::html_nodes("h3") %>%
        rvest::html_text()

      lowLevelCache(data = data,
                    query = "read_cdm_wiki_table")

      return(data)
    }
  }



#' @title
#' List the Common Data Model Tables
#' @description
#' List the names of the Common Data Model tables without the Vocabulary tables.
#' @rdname list_cdm_table_names
#' @export
list_cdm_table_names <-
  function() {
    c("PERSON", "OBSERVATION_PERIOD", "VISIT_OCCURRENCE", "VISIT_DETAIL", "CONDITION_OCCURRENCE", "DRUG_EXPOSURE", "PROCEDURE_OCCURRENCE", "DEVICE_EXPOSURE", "MEASUREMENT", "OBSERVATION", "NOTE", "NOTE_NLP", "SPECIMEN", "FACT_RELATIONSHIP", "SURVEY_CONDUCT", "LOCATION", "LOCATION_HISTORY", "CARE_SITE", "PROVIDER", "PAYER_PLAN_PERIOD", "COST", "DRUG_ERA", "DOSE_ERA", "CONDITION_ERA", "METADATA", "CDM_SOURCE")
  }


#' @title
#' List the Vocabulary Tables
#' @rdname list_vocab_table_names
#' @export
list_vocab_table_names <-
  function() {
    c("CONCEPT", "VOCABULARY", "DOMAIN", "CONCEPT_CLASS", "CONCEPT_RELATIONSHIP", "RELATIONSHIP", "CONCEPT_SYNONYM", "CONCEPT_ANCESTOR", "SOURCE_TO_CONCEPT_MAP", "DRUG_STRENGTH")
  }

#' @title
#' Get OMOP Table Abbreviations
#'
#' @return
#' Named vector of abbreviations for all OMOP Tables generated by \code{\link[base]{abbreviate}}.
#'
#' @seealso
#'  \code{\link[stringr]{str_replace}}
#' @rdname list_table_abbr
#' @export
#' @importFrom stringr str_replace_all
#' @family list functions

list_table_abbr <-
  function() {
    allTables <- c(list_cdm_table_names(),
                  list_vocab_table_names())
    allTableNames <- stringr::str_replace_all(
      allTables,
      pattern = "[_]{1}",
      replacement = " "
    )
    output <- tolower(abbreviate(allTableNames, minlength = 1L))
    names(output) <- allTables
    output
  }


#' @title
#' Get CDM Table Concept Id Fields
#'
#' @return
#' Tibble of the concept id fields for each CDM Table
#'
#' @importFrom tibble tribble
#' @export
#' @rdname list_concept_id_fields


list_concept_id_fields <-
        function(...) {

                x <-
                        list(
                                ATTRIBUTE_DEFINITION = ("attribute_type_concept_id"),
                                CARE_SITE = c("place_of_service_concept_id"),
                                COHORT = c("drug_concept_id"),
                                COHORT_ATTRIBUTE = c("value_as_concept_id"),
                                COHORT_DEFINITION = c("definition_type_concept_id", "subject_concept_id"),
                                CONDITION_ERA = c("condition_concept_id"),
                                CONDITION_OCCURRENCE = c("condition_concept_id", "condition_type_concept_id", "condition_source_concept_id", "condition_status_concept_id"),
                                COST = c("cost_type_concept_id", "currency_concept_id", "revenue_code_concept_id", "drg_concept_id"),
                                DEATH = c("death_type_concept_id", "cause_concept_id", "cause_source_concept_id"),
                                DEVICE_EXPOSURE = c("device_concept_id", "device_type_concept_id", "device_source_concept_id"),
                                DOSE_ERA = c("drug_concept_id", "unit_concept_id"),
                                DRUG_ERA = c("drug_concept_id"),
                                DRUG_EXPOSURE = c("drug_concept_id", "drug_type_concept_id", "route_concept_id", "drug_source_concept_id"),
                                EPISODE = c("episode_concept_id", "episode_object_concept_id", "episode_type_concept_id", "episode_source_concept_id"),
                                EPISODE_EVENT = c("episode_event_field_concept_id"),
                                FACT_RELATIONSHIP = c("domain_concept_id_1", "domain_concept_id_2", "relationship_concept_id"),
                                MEASUREMENT = c("measurement_concept_id", "measurement_type_concept_id", "operator_concept_id", "value_as_concept_id", "unit_concept_id", "measurement_source_concept_id", "modifier_of_field_concept_id"),
                                METADATA = c("metadata_concept_id", "metadata_type_concept_id", "value_as_concept_id"),
                                NOTE = c("note_type_concept_id", "note_class_concept_id", "encoding_concept_id", "language_concept_id"),
                                NOTE_NLP = c("section_concept_id", "note_nlp_concept_id", "note_nlp_source_concept_id"),
                                OBSERVATION = c("observation_concept_id", "observation_type_concept_id", "value_as_concept_id", "qualifier_concept_id", "unit_concept_id", "observation_source_concept_id"),
                                OBSERVATION_PERIOD = c("period_type_concept_id"),
                                PAYER_PLAN_PERIOD = c("payer_concept_id", "payer_source_concept_id", "plan_concept_id", "plan_source_concept_id", "sponsor_concept_id", "sponsor_source_concept_id", "stop_reason_concept_id", "stop_reason_source_concept_id"),
                                PERSON = c("gender_concept_id", "race_concept_id", "ethnicity_concept_id", "gender_source_concept_id", "race_source_concept_id", "ethnicity_source_concept_id"),
                                PROCEDURE_OCCURRENCE = c("procedure_concept_id", "procedure_type_concept_id", "modifier_concept_id", "procedure_source_concept_id"),
                                PROVIDER = c("specialty_concept_id", "gender_concept_id", "specialty_source_concept_id", "gender_source_concept_id"),
                                REGIMEN = c("drug_concept_id"),
                                SOURCE_TO_CONCEPT_MAP = c("source_concept_id", "target_concept_id"),
                                SPECIMEN = c("specimen_concept_id", "specimen_type_concept_id", "unit_concept_id", "anatomic_site_concept_id", "disease_status_concept_id"),
                                VISIT_DETAIL = c("visit_detail_concept_id", "visit_detail_type_concept_id", "admitting_source_concept_id", "discharge_to_concept_id", "visit_detail_source_concept_id"),
                                VISIT_OCCURRENCE = c("visit_concept_id", "visit_type_concept_id", "visit_source_concept_id", "admitting_source_concept_id", "discharge_to_concept_id")
                        )


                if (missing(...)) {

                        x

                } else {

                        x[names(x) %in% unlist(rlang::list2(...))]
                }


        }

#' @title
#' Get the CDM Data Dictionary
#' @seealso
#'  \code{\link[dplyr]{bind}}
#'  \code{\link[rubix]{format_colnames}}
#' @rdname get_cdm_data_dictionary
#' @export
#' @importFrom dplyr bind_rows
#' @importFrom rubix format_colnames
get_cdm_data_dictionary <-
  function() {
      omop_cdm_wiki <- read_cdm_wiki_table()
      cdm_table_names <- list_cdm_table_names()
      omop_cdm_wiki[cdm_table_names] %>%
        dplyr::bind_rows(.id = "table") %>%
        rubix::format_colnames()
  }

#' @title
#' Get the Concept Id Field Names for Each Table
#' @seealso
#'  \code{\link[rubix]{filter_at_grepl}},\code{\link[rubix]{split_deselect}}
#'  \code{\link[dplyr]{select}}
#'  \code{\link[purrr]{map}}
#' @rdname get_concept_id_fields
#' @export
#' @importFrom rubix filter_at_grepl split_deselect
#' @importFrom dplyr select
#' @importFrom purrr map
get_concept_id_fields <-
  function() {
    cdm_metadata <- get_cdm_data_dictionary()
    cdm_metadata %>%
      rubix::filter_at_grepl(col = cdm_field,
                             grepl_phrase = "concept_id") %>%
      dplyr::select(table, cdm_field) %>%
      rubix::split_deselect(col = table) %>%
      purrr::map(unlist) %>%
      purrr::map(unname)
  }

#' @title
#' Get the Vocabulary Data Dictionary
#' @seealso
#'  \code{\link[dplyr]{bind}}
#'  \code{\link[rubix]{format_colnames}}
#' @rdname get_vocab_data_dictionary
#' @export
#' @importFrom dplyr bind_rows
#' @importFrom rubix format_colnames
get_vocab_data_dictionary <-
  function() {
    omop_cdm_wiki <- read_cdm_wiki_table()
    table_names <- list_vocab_table_names()
    omop_cdm_wiki[table_names] %>%
      dplyr::bind_rows(.id = "Table") %>%
      rubix::format_colnames()
  }
patelm9/chariot documentation built on Feb. 19, 2022, 11:29 a.m.