#' Writes Athena CONCEPT table NET
#' @param phrases character vector of length 1 or greater of queries
#' @param path_to_athena_lookup path to lookup table
#' @param id_start_index if number of rows of the lookup is 0, the starting index for the id number assigned
#' @param path_to_repo if not NULL, will add, commit, and push each iterative change to origin master of the repo
#' @return csv files for each unique phrase queried in concept table and at the conclusion of the interation, returns the most updated Athena lookup to the console
#' @import readr
#' @import dplyr
#' @import mirCat
#' @importFrom typewriteR tell_me
#' @importFrom crayon cyan
#' @importFrom mySeagull connect_to_local_postgres
#' @import DBI
#' @importFrom mirroR create_path_to_file
#' @export
#'
#'
write_athena_net <-
function(phrases,
path_to_athena_lookup = "/Users/meerapatel/GitHub/MSK_KMI_Enterprise/biblio-tech/CATALOGUE/Athena_Vocabulary_v5/LOOKUP.csv",
path_to_net_dir = "/Users/meerapatel/GitHub/MSK_KMI_Enterprise/biblio-tech/CATALOGUE/Athena_Vocabulary_v5/NETS",
sub_forward_slash = TRUE,
trim_inner_space = TRUE,
sub_underscore = TRUE,
id_start_index = 60000,
path_to_repo = NULL,
return_lookup = TRUE) {
for (i in 1:length(phrases)) {
athena_lookup <- readr::read_csv(path_to_athena_lookup,
col_types = cols(.default = "c"))
if (i == 1) {
total_obs <- length(phrases)
}
phrase_00 <- phrases[i]
if (sub_forward_slash == TRUE) {
phrase_01 <- stringr::str_replace_all(phrase_00, "[/]{1}", " ")
} else {
phrase_01 <- phrase_00
}
if (sub_underscore == TRUE) {
phrase_01 <- stringr::str_replace_all(phrase_01, "[_]{1}", " ")
} else {
phrase_01 <- phrase_01
}
if (trim_inner_space == TRUE) {
phrase_01 <- trimws(trimis(phrase_01), "both")
} else {
phrase_01 <- trimws(phrase_01, "both")
}
typewriteR::tell_me("Starting", i, "of", crayon::cyan(total_obs))
typewriteR::tell_me("Native Phrase:", crayon::cyan(phrase_00))
typewriteR::tell_me("Processed Phrase:", crayon::cyan(phrase_01))
if (!(phrase_01 %in% athena_lookup$ATHENA_SQL_KEYWORD)) {
if (nrow(athena_lookup) == 0) {
athena_sql_keyword_id <- id_start_index
} else {
athena_sql_keyword_id <- max(as.integer(athena_lookup$ATHENA_SQL_KEYWORD_ID)) + (sample(1:9, 1))
}
brake_if_na(athena_sql_keyword_id)
conn_to_athena <- mySeagull::connect_to_local_postgres(dbname = "athena")
sql_statement <- paste0("SELECT * FROM public.concept WHERE concept_name LIKE '%", phrase_01, "%';")
output <- DBI::dbGetQuery(conn_to_athena, sql_statement)
output_fn <-
mirroR::create_path_to_file(path_folder = "/Users/meerapatel/GitHub/MSK_KMI_Enterprise/biblio-tech/CATALOGUE/Athena_Vocabulary_v5/NETS",
basename = paste0(athena_sql_keyword_id, "_", phrase_01),
file_extension = "csv")
readr::write_csv(output, path = output_fn)
mirCat::append_csv(csv_fn = "/Users/meerapatel/GitHub/MSK_KMI_Enterprise/biblio-tech/CATALOGUE/Athena_Vocabulary_v5/LOOKUP.csv",
dataframe = dplyr::tibble(ATHENA_SQL_KEYWORD_TIMESTAMP = mirroR::get_timestamp(),
ATHENA_SQL_KEYWORD_ID = athena_sql_keyword_id,
ATHENA_SQL_KEYWORD = phrase_01,
NET_CUI_COUNT = length(unique(output$concept_code)),
NET_ROW_COUNT = nrow(output)) %>%
somersaulteR::call_mr_clean()
)
DBI::dbDisconnect(conn_to_athena)
if (!(is.null(path_to_repo))) {
mirCat::git_add_all(path_to_repo)
mirCat::git_commit(path_to_repo, commit_message = paste0("+: ", paste0(athena_sql_keyword_id, "_", phrase_01)))
mirCat::git_push_to_msk(path_to_repo)
typewriteR::tell_me(i, "of", crayon::cyan(total_obs), "completed.")
cat("\n\n\n")
} else {
typewriteR::tell_me(i, "of", crayon::cyan(total_obs), "completed.")
cat("\n\n\n")
}
}
}
if (return_lookup == TRUE) {
athena_lookup <- readr::read_csv(path_to_athena_lookup,
col_types = cols(.default = "c"))
return(athena_lookup)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.