library(tidyverse)
library(pg13)
library(ggplot2)
library(amphora)
conn <- pg13::local_connect()

Reverse engineering the false positives will allows for identification of the OMOP Concept Classes in ATC and/or HemOnc that contain the false positive drugs that can then be excluded from analyses. The false positive concepts are as follows:

false_positives <- false_positive_ingredients()
staging_table_name <- 
        pg13::write_staging_table(conn = conn, 
                                  schema = "public",
                                  drop_existing = TRUE,
                                  data = false_positives)
sql_statement <-
pg13::build_join_query(distinct = TRUE,
                 schema = "public",
                 table = staging_table_name,
                 column = "rxnorm_ingredient_id",
                 join_on_schema = "omop_vocabulary",
                 join_on_table = "concept",
                 join_on_column = "concept_id")

false_positives <-
pg13::query(conn = conn, 
            sql_statement = sql_statement) %>%
        dplyr::select(-rxnorm_ingredient_id)

false_positives

The classification of the false positives is derived from the Concept Ancestor Table, where any ancestors to the false positives are first identified and selected for if they are a OMOP Drug Class.

staging_table_name <- 
        pg13::write_staging_table(conn = conn, 
                                  schema = "public",
                                  drop_existing = TRUE,
                                  data = false_positives %>%
                                          select(fp_ingr_id = concept_id,
                                                 fp_ingr_name = concept_name))
sql_statement <-
pg13::build_join_query(distinct = TRUE,
                 schema = "public",
                 table = staging_table_name,
                 column = "fp_ingr_id",
                 join_on_schema = "omop_vocabulary",
                 join_on_table = "concept_ancestor",
                 join_on_column = "descendant_concept_id")

false_positives_anc <-
pg13::query(conn = conn, 
            sql_statement = sql_statement) 

staging_table_name <- 
        pg13::write_staging_table(conn = conn, 
                                  schema = "public",
                                  drop_existing = TRUE,
                                  data = false_positives_anc)

sql_statement <-
pg13::build_join_query(distinct = TRUE,
                 schema = "public",
                 table = staging_table_name,
                 column = "ancestor_concept_id",
                 join_on_schema = "omop_vocabulary",
                 join_on_table = "concept",
                 join_on_column = "concept_id",
                 where_in_join_on_field = "standard_concept",
                 case_insensitive = FALSE,
                 where_in_join_on_vector = "C")

false_positives_anc2 <-
pg13::query(conn = conn, 
            sql_statement = sql_statement)

false_positives_anc2

The distribution of the false positives across the OMOP Vocabularies identified.

plot <- ggplot(data = false_positives_anc2,
               aes(x = vocabulary_id))
plot + geom_histogram(stat = "count")

Focusing on the ATC and HemOnc Classifications:

false_positives_anc3 <-
        false_positives_anc2 %>%
        filter(vocabulary_id %in% c("HemOnc", "ATC"))
plot <- ggplot(data = false_positives_anc3,
               aes(x = vocabulary_id))
plot + geom_histogram(stat = "count")

The following special relationships require additional modification before moving forward:
1. There are cases where the minimum level of separation is not the equivalent to the max levels of separation. Since this particular use case indicates an offset of 1 level, the maximum level of separation is used to determine the hierarchy.

false_positives_anc3 %>%
        filter(min_levels_of_separation != max_levels_of_separation) %>%
        select(min_levels_of_separation, 
               max_levels_of_separation) %>%
        distinct()
  1. There are cases of O levels of separation between an RxNorm Ingredient and either a ATC or a HemOnc Class. These cases will be treated as 1 level of separation.
false_positives_anc3 %>%
        filter(min_levels_of_separation == 0|max_levels_of_separation == 0) %>%
        select(min_levels_of_separation,
               max_levels_of_separation,
               vocabulary_id) %>%
        distinct()
false_positives_anc4 <-
        false_positives_anc3 %>%
        mutate(levels_of_separation = coalesce(max_levels_of_separation, min_levels_of_separation)) %>%
        mutate(levels_of_separation = ifelse(levels_of_separation == 0, 1, levels_of_separation)) %>%
        select(-max_levels_of_separation,
               -min_levels_of_separation)
false_positives_anc4

HemOnc

false_positives_anc5_ho <-
        false_positives_anc4 %>%
        filter(vocabulary_id %in% "HemOnc") %>%
        unite(col = "0",
              fp_ingr_id,
              fp_ingr_name,
              sep = " ",
              na.rm = TRUE) %>%
        unite(col = ancestor,
              concept_id, 
              concept_name,
              sep = " ",
              na.rm = TRUE) %>%
        select(`0`,
               ancestor, 
               levels_of_separation)
ho_collapsible_tree <-
false_positives_anc5_ho %>%
        pivot_wider(id_cols = `0`,
                    names_from = levels_of_separation,
                    values_from = ancestor,
                    values_fn = function(x) paste(unique(x), collapse = "|")) %>%
        select(all_of(as.character(0:4))) %>%
        separate_rows(`1`, sep = "[|]{1}") %>%
        separate_rows(`2`, sep = "[|]{1}") %>%
        separate_rows(`3`, sep = "[|]{1}") %>%
        separate_rows(`4`, sep = "[|]{1}")
ho_tree <- collapsibleTree::collapsibleTree(df = ho_collapsible_tree, 
                                 hierarchy = as.character(0:4),
                                 root = "HemOnc False Positives",
                                 linkLength = 250,
                                 width = 1500,
                                 height = 1500,
                                 zoomable = TRUE,
                                 collapsed = FALSE
                                 )

htmlwidgets::saveWidget(widget = ho_tree,
                        file = file.path(getwd(), "vignettes/widgets/reverse_engineered_false_positives_ho.html"))

ATC

false_positives_anc5_atc <-
        false_positives_anc4 %>%
        filter(vocabulary_id %in% "ATC") %>%
        unite(col = "0",
              fp_ingr_id,
              fp_ingr_name,
              sep = " ",
              na.rm = TRUE) %>%
        unite(col = ancestor,
              concept_id, 
              concept_name,
              sep = " ",
              na.rm = TRUE) %>%
        select(`0`,
               ancestor, 
               levels_of_separation)

Though ATC has 5 levels, only 4 are chosen since the topmost level (5 in this use case) is too general for us to use as a blanket exclusion criteria.

atc_collapsible_tree <-
false_positives_anc5_atc %>%
        pivot_wider(id_cols = `0`,
                    names_from = levels_of_separation,
                    values_from = ancestor,
                    values_fn = function(x) paste(unique(x), collapse = "|")) %>%
        select(`0`, `2`, `3`) %>%
        separate_rows(`2`, sep = "[|]{1}") %>%
        separate_rows(`3`, sep = "[|]{1}") %>%
        distinct()
atc_tree <- collapsibleTree::collapsibleTree(df = atc_collapsible_tree, 
                                 hierarchy = c("0", "2", "3"),
                                 root = "ATC False Positives",
                                 linkLength = 350,
                                 width = 1250,
                                 height = 22000,
                                 zoomable = TRUE,
                                 collapsed = FALSE)
htmlwidgets::saveWidget(widget = atc_tree,
                        file = file.path(getwd(), "vignettes/widgets/reverse_engineered_false_positives_atc.html"))
# pg13::drop_all_staging_tables(conn = conn,
#                               schema = "public")
pg13::dc(conn = conn)


meerapatelmd/oldamphora documentation built on Jan. 4, 2021, 12:33 a.m.