R/write_radlex_lookup_only.R

Defines functions write_radlex_lookup_only

Documented in write_radlex_lookup_only

#' Writes Athena CONCEPT table NET
#' @param phrases character vector of length 1 or greater of queries
#' @param path_to_radlex_lookup path to lookup table
#' @param id_start_index if number of rows of the lookup is 0, the starting index for the id number assigned
#' @param path_to_repo if not NULL, will add, commit, and push each iterative change to origin master of the repo
#' @import readr
#' @import dplyr
#' @import mirCat
#' @importFrom typewriteR tell_me
#' @importFrom rubix call_mr_clean
#' @importFrom crayon cyan
#' @import DBI
#' @importFrom mirroR create_path_to_file
#' @importFrom projektoR append_csv
#' @export
#'
#'

write_radlex_lookup_only <-
        function(phrases,
                 path_to_radlex_lookup,
                 path_to_radlex_xls,
                 sub_forward_slash = TRUE,
                 trim_inner_space = TRUE,
                 sub_underscore = TRUE,
                 id_start_index = 9000,
                 path_to_repo = NULL,
                 return_lookup = TRUE) {
                for (i in 1:length(phrases)) {
                        radlex_lookup <- readr::read_csv(path_to_radlex_lookup,
                                                         col_types = cols(.default = "c"))

                        if (i == 1) {
                                total_obs <- length(phrases)
                        }

                        phrase_00 <- phrases[i]

                        if (sub_forward_slash == TRUE) {
                                phrase_01 <- stringr::str_replace_all(phrase_00, "[/]{1}", " ")
                        } else {
                                phrase_01 <- phrase_00
                        }

                        if (sub_underscore == TRUE) {
                                phrase_01 <- stringr::str_replace_all(phrase_01, "[_]{1}", " ")
                        } else {
                                phrase_01 <- phrase_01
                        }

                        if (trim_inner_space == TRUE) {
                                phrase_01 <- trimws(trimis(phrase_01), "both")
                        } else {
                                phrase_01 <- trimws(phrase_01, "both")
                        }

                        typewriteR::tell_me("Starting", i, "of", crayon::cyan(total_obs))
                        typewriteR::tell_me("Native Phrase:", crayon::cyan(phrase_00))
                        typewriteR::tell_me("Processed Phrase:", crayon::cyan(phrase_01))

                        if (!(phrase_01 %in% radlex_lookup$RADLEX_SQL_KEYWORD)) {
                                if (nrow(radlex_lookup) == 0) {
                                        radlex_sql_keyword_id <- id_start_index
                                } else {
                                        radlex_sql_keyword_id <- max(as.integer(radlex_lookup$RADLEX_SQL_KEYWORD_ID)) + (sample(1:9, 1))
                                }

                                brake_if_na(radlex_sql_keyword_id)

                                output <- loosely_ask_radlex(phrase = phrase_01,
                                                             path_to_radlex_xls = path_to_radlex_xls)

                                output2 <- strictly_ask_radlex(phrase = phrase_01,
                                                               path_to_radlex_xls = path_to_radlex_xls)

                                projektoR::append_csv(csv_fn = path_to_radlex_lookup,
                                                      dataframe = dplyr::tibble(RADLEX_SQL_KEYWORD_TIMESTAMP = mirroR::get_timestamp(),
                                                                         RADLEX_SQL_KEYWORD_ID = radlex_sql_keyword_id,
                                                                         RADLEX_SQL_KEYWORD = phrase_01,
                                                                         COORDINATE_CUI = output2$`http://data.bioontology.org/metadata/prefixIRI`[1],
                                                                         COORDINATE_STR = output2$`Preferred Label`[1],
                                                                         NET_CUI_COUNT = length(unique(output$`http://data.bioontology.org/metadata/prefixIRI`)),
                                                                         NET_ROW_COUNT = nrow(output)) %>%
                                                                                rubix::call_mr_clean()
                                                        )

                                if (!(is.null(path_to_repo))) {
                                        mirCat::git_add_all(path_to_repo)
                                        mirCat::git_commit(path_to_repo, commit_message = paste0("+: ", paste0(radlex_sql_keyword_id, "_", phrase_01)))
                                        mirCat::git_push_to_msk(path_to_repo)

                                        typewriteR::tell_me(i, "of", crayon::cyan(total_obs), "completed.")
                                        cat("\n\n\n")
                                } else {
                                        typewriteR::tell_me(i, "of", crayon::cyan(total_obs), "completed.")
                                        cat("\n\n\n")
                                }

                        }
                }

                if (return_lookup == TRUE) {
                        radlex_lookup <- readr::read_csv(path_to_radlex_lookup,
                                                         col_types = cols(.default = "c"))

                        return(radlex_lookup)
                }
        }
patelm9/cartographR2 documentation built on Dec. 24, 2019, 3:03 p.m.