library(tidyverse) library(pg13) library(ggplot2) library(amphora) conn <- pg13::local_connect()
Reverse engineering the false positives will allows for identification of the OMOP Concept Classes in ATC and/or HemOnc that contain the false positive drugs that can then be excluded from analyses. The false positive concepts are as follows:
false_positives <- false_positive_ingredients() staging_table_name <- pg13::write_staging_table(conn = conn, schema = "public", drop_existing = TRUE, data = false_positives) sql_statement <- pg13::build_join_query(distinct = TRUE, schema = "public", table = staging_table_name, column = "rxnorm_ingredient_id", join_on_schema = "omop_vocabulary", join_on_table = "concept", join_on_column = "concept_id") false_positives <- pg13::query(conn = conn, sql_statement = sql_statement) %>% dplyr::select(-rxnorm_ingredient_id) false_positives
The classification of the false positives is derived from the Concept Ancestor Table, where any ancestors to the false positives are first identified and selected for if they are a OMOP Drug Class.
staging_table_name <- pg13::write_staging_table(conn = conn, schema = "public", drop_existing = TRUE, data = false_positives %>% select(fp_ingr_id = concept_id, fp_ingr_name = concept_name)) sql_statement <- pg13::build_join_query(distinct = TRUE, schema = "public", table = staging_table_name, column = "fp_ingr_id", join_on_schema = "omop_vocabulary", join_on_table = "concept_ancestor", join_on_column = "descendant_concept_id") false_positives_anc <- pg13::query(conn = conn, sql_statement = sql_statement) staging_table_name <- pg13::write_staging_table(conn = conn, schema = "public", drop_existing = TRUE, data = false_positives_anc) sql_statement <- pg13::build_join_query(distinct = TRUE, schema = "public", table = staging_table_name, column = "ancestor_concept_id", join_on_schema = "omop_vocabulary", join_on_table = "concept", join_on_column = "concept_id", where_in_join_on_field = "standard_concept", case_insensitive = FALSE, where_in_join_on_vector = "C") false_positives_anc2 <- pg13::query(conn = conn, sql_statement = sql_statement) false_positives_anc2
The distribution of the false positives across the OMOP Vocabularies identified.
plot <- ggplot(data = false_positives_anc2, aes(x = vocabulary_id)) plot + geom_histogram(stat = "count")
Focusing on the ATC and HemOnc Classifications:
false_positives_anc3 <- false_positives_anc2 %>% filter(vocabulary_id %in% c("HemOnc", "ATC"))
plot <- ggplot(data = false_positives_anc3, aes(x = vocabulary_id)) plot + geom_histogram(stat = "count")
The following special relationships require additional modification before moving forward:
1. There are cases where the minimum level of separation is not the equivalent to the max levels of separation. Since this particular use case indicates an offset of 1 level, the maximum level of separation is used to determine the hierarchy.
false_positives_anc3 %>% filter(min_levels_of_separation != max_levels_of_separation) %>% select(min_levels_of_separation, max_levels_of_separation) %>% distinct()
false_positives_anc3 %>% filter(min_levels_of_separation == 0|max_levels_of_separation == 0) %>% select(min_levels_of_separation, max_levels_of_separation, vocabulary_id) %>% distinct()
false_positives_anc4 <- false_positives_anc3 %>% mutate(levels_of_separation = coalesce(max_levels_of_separation, min_levels_of_separation)) %>% mutate(levels_of_separation = ifelse(levels_of_separation == 0, 1, levels_of_separation)) %>% select(-max_levels_of_separation, -min_levels_of_separation) false_positives_anc4
false_positives_anc5_ho <- false_positives_anc4 %>% filter(vocabulary_id %in% "HemOnc") %>% unite(col = "0", fp_ingr_id, fp_ingr_name, sep = " ", na.rm = TRUE) %>% unite(col = ancestor, concept_id, concept_name, sep = " ", na.rm = TRUE) %>% select(`0`, ancestor, levels_of_separation)
ho_collapsible_tree <- false_positives_anc5_ho %>% pivot_wider(id_cols = `0`, names_from = levels_of_separation, values_from = ancestor, values_fn = function(x) paste(unique(x), collapse = "|")) %>% select(all_of(as.character(0:4))) %>% separate_rows(`1`, sep = "[|]{1}") %>% separate_rows(`2`, sep = "[|]{1}") %>% separate_rows(`3`, sep = "[|]{1}") %>% separate_rows(`4`, sep = "[|]{1}")
ho_tree <- collapsibleTree::collapsibleTree(df = ho_collapsible_tree, hierarchy = as.character(0:4), root = "HemOnc False Positives", linkLength = 250, width = 1500, height = 1500, zoomable = TRUE, collapsed = FALSE ) htmlwidgets::saveWidget(widget = ho_tree, file = file.path(getwd(), "vignettes/widgets/reverse_engineered_false_positives_ho.html"))
false_positives_anc5_atc <- false_positives_anc4 %>% filter(vocabulary_id %in% "ATC") %>% unite(col = "0", fp_ingr_id, fp_ingr_name, sep = " ", na.rm = TRUE) %>% unite(col = ancestor, concept_id, concept_name, sep = " ", na.rm = TRUE) %>% select(`0`, ancestor, levels_of_separation)
Though ATC has 5 levels, only 4 are chosen since the topmost level (5 in this use case) is too general for us to use as a blanket exclusion criteria.
atc_collapsible_tree <- false_positives_anc5_atc %>% pivot_wider(id_cols = `0`, names_from = levels_of_separation, values_from = ancestor, values_fn = function(x) paste(unique(x), collapse = "|")) %>% select(`0`, `2`, `3`) %>% separate_rows(`2`, sep = "[|]{1}") %>% separate_rows(`3`, sep = "[|]{1}") %>% distinct()
atc_tree <- collapsibleTree::collapsibleTree(df = atc_collapsible_tree, hierarchy = c("0", "2", "3"), root = "ATC False Positives", linkLength = 350, width = 1250, height = 22000, zoomable = TRUE, collapsed = FALSE) htmlwidgets::saveWidget(widget = atc_tree, file = file.path(getwd(), "vignettes/widgets/reverse_engineered_false_positives_atc.html"))
# pg13::drop_all_staging_tables(conn = conn, # schema = "public") pg13::dc(conn = conn)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.