R/accessory_functions.R
In CEDARS: Simple and Efficient Pipeline for Electronic Health Record Annotation

Documented in colorize find_model find_project_name format_keywords sanitize sanitize_query save_credentials standardize_nlp start_local

# Sundry functions


#' Sanitize Text
#'
#' Removes non-ASCII characters and '*' from clinical corpora or user input.
#' @param text Raw text from the EHR.
#' @return Cleaned text in ASCII format stripped of '*' characters.
#' @keywords internal

sanitize <- function(text) {

    text <- gsub("[^\\x00-\\x7F]", " ", text, perl = TRUE)
    text <- gsub("\\*", " ", text)
    text

}


#' Sanitize Query
#'
#' Removes non-ASCII characters from search query and attempts to correct formulation errors.
#' @param query Raw text from the query.
#' @return Cleaned query.
#' @keywords internal

sanitize_query <- function(query) {

    query <- gsub("[^\\x00-\\x7F]", " ", query, perl = TRUE)
    query <- gsub("\\*+", "*", query)
    query <- gsub("\\(+", "(", query)
    query <- gsub("\\)+", ")", query)
    query

}


#' Format Keyword Elements
#'
#' Prepares keyword element vector, converting wildcards to Regex syntax. Wildcards include '?' for any single character and '*' for any number of characters including zero.
#' @param keyword_elements Keywords.
#' @return Formatted keywords.
#' @keywords internal

format_keywords <- function(keyword_elements) {

    keyword_elements <- tolower(keyword_elements)
    keyword_elements <- utils::glob2rx(keyword_elements, trim.head = TRUE, trim.tail = TRUE)
    keyword_elements

}


#' Standardize Output From NLP Engine
#'
#' Converts output of NLP engine to a dataframe with entries recognized by CEDARS.
#' @param input_df Data frame produced by the NLP engine.
#' @param engine NLP engine. At this time only UDPipe is supported.
#' @return A dataframe with NLP annotations.
#' @keywords internal

standardize_nlp <- function(input_df, engine) {

    if (engine == "udpipe") {

        output_df <- data.frame(text_id = input_df$doc_id, paragraph_id = input_df$paragraph_id, sentence_id = input_df$sentence_id,
            start = input_df$start, end = input_df$end, token = input_df$token, lemma = input_df$lemma, token_id = input_df$token_id,
            upos = input_df$upos, xpos = input_df$xpos, head_token_id = input_df$head_token_id, dependency = input_df$dep_rel,
            features = input_df$feats)
    }

    output_df$upos <- as.character(output_df$upos)
    output_df$token_id <- as.character(output_df$token_id)
    output_df$token_id <- as.numeric(output_df$token_id)
    output_df$head_token_id <- as.character(output_df$head_token_id)
    output_df$head_token_id <- as.numeric(output_df$head_token_id)

    output_df

}


#' Highlight Keywords and Concepts
#'
#' Adds color and changes font to bold for selected strings of sentences and notes. The current method uses HTML.
#' @param get_output Dataframe with field 'selected', the latter containing text string corresponding to selected sentences.
#' @return Text string with HTML markup.
#' @keywords internal

colorize <- function(get_output) {

    get_output$selected <- gsub("\\*START\\*", "<span style=\"color:red;font-weight:bold\">", get_output$selected)
    get_output$selected <- gsub("\\*END\\*", "</span>", get_output$selected)

    get_output$note_text <- gsub("\\*START\\*", "<span style=\"color:red;font-weight:bold\">", get_output$note_text)
    get_output$note_text <- gsub("\\*END\\*", "</span>", get_output$note_text)

    get_output$note_text <- gsub("\\n", "<br>", get_output$note_text)

    get_output

}


#' Start CEDARS Locally
#'
#' Starts CEDARS locally from RStudio. This is a functional approach and is easier to implement than a full-fledged Shiny Server. Multiple users can access the same CEDARS project on the MongoDB server using separate local R sessions, however in that case MongoDB credentials would have to be shared to all. The best option for multi-user implementations is to use Shiny Server.
#' @param user DB user name.
#' @param password DB password.
#' @param host Host server.
#' @param port MongoDB port.
#' @param database MongoDB database name.
#' @return {
#' Launches the CEDARS Shiny app locally (on the desktop computer).
#' }
#' @examples
#' \dontrun{
#' start_local(user = 'John', password = 'db_password_1234', host = 'server1234', port = NA,
#' database = 'myDB')
#' }
#' @export

start_local <- function(user, password, host, port, database) {

    assign("g_user", user, cedars.env)
    assign("g_password", password, cedars.env)
    assign("g_host", host, cedars.env)
    assign("g_port", port, cedars.env)
    assign("g_database", database, cedars.env)
    assign("g_ldap", FALSE, cedars.env)

    shiny::runApp(appDir = paste(find.package("CEDARS", lib.loc = NULL, quiet = TRUE), "/shiny", sep = ""))

}


#' Save MongoDB Credentials
#'
#' Saves MongoDB credentials as 'db_credentials.Rdata' and Shiny app file as 'app.R'. Those two files should be copied to the Shiny Server app directory. Needed only if using Shiny Server; credentials are entered in the command line for local app use.
#' @param user MongoDB user name.
#' @param password MongoDB user password.
#' @param host MongoDB server host.
#' @param port MongoDB port.
#' @param database MongoDB database name.
#' @param destination_path Folder where the files should be saved. Default is working directory.
#' @param LDAP is LDAP being used? In this case, CEDARS will not prompt for user ID/password and a check will NOT be made on the users table. Access will be granted, relying on LDAP authentication. Annotations will be stamped with LDAP user name.
#' @return {
#' No return value, saves DB administrator credentials in local folder.
#' }
#' @examples
#' \dontrun{
#' save_credentials(user = 'John', password = 'db_password_1234', host = 'server1234',
#' database = 'myDB', LDAP = FALSE, destination_path = getwd())
#' }
#' @export

save_credentials <- function(user, password, host, port, database, LDAP, destination_path = getwd()) {

    app_path <- paste(find.package("CEDARS", lib.loc = NULL, quiet = TRUE), "/shiny/app.R", sep = "")

    g_user <- user
    g_password <- password
    g_host <- host
    g_port <- port
    g_database <- database
    g_ldap <- LDAP

    file.copy(from = app_path, to = paste(destination_path, "/app.R", sep = ""))

    save(g_user, g_password, g_host, g_port, g_database, g_ldap, file = paste(destination_path, "/db_credentials.Rdata",
        sep = ""))

}


#' Generate unique test project name (i.e. DB name) on MongoDB CEDARS testing cluster
#'
#' Parses existing DB names and randomly generates a unique test project name on MongoDB CEDARS testing cluster. This is used for convenience purposes when the R user does not have an existing MongoDB connection. The corresponding database and collections are PUBLIC so no patient information or any other privileged/confidential data should be used! This is for testing on simulated records only.
#'
#' @details {
#' No parameter; the operation is performed on a preset server with no user input.
#' }
#' @return {
#' An object of class character, the randomly generated name of a test CEDARS project.
#' }
#' @examples
#' \dontrun{
#' find_project_name()
#' }
#' @export

find_project_name <- function(){

    base_url <- "mongodb+srv://testUser:testPW@cedars.yvjp6.mongodb.net/"
    con <- mongolite::mongo(db="admin", url=base_url)
    databases <- con$run('{ "listDatabases": 1 }')$databases$name

    # Find a test DB name not presently in use

    project_name_recurse <- function(databases){

        project_name <- paste("example", round(1e13*stats::runif(1)), sep="")

        if (project_name %in% databases) project_name <- project_name_recurse(databases)

        project_name

    }

    project_name <- project_name_recurse(databases)

    project_name

}


#' Find NLP Model
#'
#' Finds NLP model to use, if not specified. If no model present, we download the default.
#' If several present, by default we use the first one by alphabetical order.
#'
#' @param selected_model_path Initial selected model path, if NA we find one.
#' @return Model path.
#' @keywords internal

find_model <- function(selected_model_path){

    if (is.na(selected_model_path)) {

        models_path <- paste(find.package("CEDARS", lib.loc = NULL, quiet = TRUE), "/inst/models", sep = "")
        models <- list.files(path = models_path)
        models <- models[order(models, decreasing = FALSE, method = "radix")]
        if (!is.na(models[1])) selected_model_path <- paste(models_path, "/", models[1], sep = "") else {

            print("No model found, dowloading default english-ewt...")
            get_model()
            models <- list.files(path = models_path)
            selected_model_path <- paste(models_path, "/", models[1], sep = "")

        }

    }

    selected_model_path

}

Any scripts or data that you put into this service are public.

CEDARS documentation built on Feb. 7, 2021, 5:06 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

CEDARS
Simple and Efficient Pipeline for Electronic Health Record Annotation

R/accessory_functions.R
In CEDARS: Simple and Efficient Pipeline for Electronic Health Record Annotation

Defines functions find_model find_project_name save_credentials start_local colorize standardize_nlp format_keywords sanitize_query sanitize

Documented in colorize find_model find_project_name format_keywords sanitize sanitize_query save_credentials standardize_nlp start_local

Try the CEDARS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CEDARS Simple and Efficient Pipeline for Electronic Health Record Annotation

R/accessory_functions.R In CEDARS: Simple and Efficient Pipeline for Electronic Health Record Annotation

Defines functions find_model find_project_name save_credentials start_local colorize standardize_nlp format_keywords sanitize_query sanitize

Documented in colorize find_model find_project_name format_keywords sanitize sanitize_query save_credentials standardize_nlp start_local

Try the CEDARS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CEDARS
Simple and Efficient Pipeline for Electronic Health Record Annotation

R/accessory_functions.R
In CEDARS: Simple and Efficient Pipeline for Electronic Health Record Annotation