#' Section Segmentation of Radiology Text Reports
#'
#' This function segments the imagereporttext column of a data frame into columns containing text
#' history, exam, comparison, technique, finding, impression
#' @param df Input data frame
#' @param site 1= , 2= , 3=, 4=; defaults to 2
#' @param hst_rgx Defaults to regex for Kaiser
#' @param exam_rgx Defaults to regex for Kaiser
#' @param comparison_rgx Defaults to regex for Kaiser
#' @param technique_rgx Defaults to regex for Kaiser
#' @param body_rgx Defaults to regex for Kaiser
#' @param imp_rgx Defaults to regex for Kaiser
#' @param datetime_rgx
#' @param liretext_rgx Defaults to regular expressions for LIRE macro; don't change unless there's a good reason to.
#' @keywords section segmentation
#' @import dplyr
#' @import tidyr
#' @export
#' @return The input data frame with additional columns exam, comparison, technique, body, impression.
#' The body column is always non-empty: the entire imagereporttext is returned in body if separation is not possible.
#' @examples
#' SectionSegmentation(input.df, site = 2)
SectionSegmentation <- function(df,
site = NULL,
hst_rgx = NULL,
exam_rgx = NULL,
comparison_rgx = NULL,
technique_rgx = NULL,
body_rgx = NULL,
imp_rgx = NULL,
datetime_rgx = NULL,
liretext_rgx = "(Among\\s*people\\s*(over|under|between)\\s*the\\s*age\\s*of.*$)|(Some\\s*findings\\s*are\\s*so\\s*common.*$)|(The\\s*following\\s*findings\\s*are\\s*so\\s*common\\s*in\\s*normal.*$)"){
if(is.null(site) & (is.null(body_rgx) | is.null(imp_rgx)))
print("No site or regular expression supplied; defaulting to parameters for site = 2 (Kaiser Permanente).")
# Create regular expressions of heading by site
if(site == 1){
hst_rgx <- "(\\[\\s*HST\\s*\\]\\s*:)|(HISTORY\\s*:)|(History\\s*:)"
exam_rgx <- "(EXAMINATION\\s*:)|(EXAM\\s*:)|(Exam\\s*:)|(Examination\\s*:)"
comparison_rgx <- "(COMPARISON\\s*:)|(Comparison\\s*:)|(COMPARISONS\\s*:)"
technique_rgx <- "(TECHNIQUE\\s*:)|(Technique\\s*:)"
body_rgx <- "(FINDINGS\\s*:)|(Findings\\s*:)|(FINDING\\s*:)|(Finding\\s*:)"
imp_rgx <- "(IMPRESSION\\s*:)|(Impression\\s*:)|(IMPRESSIONS\\s*:)|(IMPRESSION\\s\\.\\s)"
datetime_rgx <- "Date\\s*:\\s*(\\d){2}\\/(\\d){2}\\/(\\d){4}\\s*Time\\s*:(\\d){1,2}:(\\d){2}(PM)?$"
}
if(site == 2 | is.null(site)){
hst_rgx <- "\\*\\*\\s*HISTORY\\s*\\*\\*\\s*:"
exam_rgx <- "(EXAMINATION\\s*:)|(EXAM\\s*:)|(Exam\\s*:)|(Examination\\s*:)"
comparison_rgx <- "(COMPARISON\\s*:)|(Comparison\\s*:)|(COMPARISONS\\s*:)"
technique_rgx <- "(TECHNIQUE\\s*:)|(Technique\\s*:)"
body_rgx <- "\\*\\*\\s*FINDINGS\\s*\\*\\*\\s*:"
imp_rgx <- "\\*\\*\\s*IMPRESSION\\s*\\*\\*\\s*:"
datetime_rgx <- "Date\\s*:\\s*(\\d){2}\\/(\\d){2}\\/(\\d){4}\\s*Time\\s*:(\\d){1,2}:(\\d){2}(PM)?$"
}
if(site == 3){
hst_rgx <- "(Clinical\\s*History\\s*:)(\\[\\s*HST\\s*\\]\\s*:)|(HISTORY\\s*:)|(History\\s*:)"
exam_rgx <- "(EXAMINATION\\s*:)|(EXAM\\s*:)|(Exam\\s*:)|(Examination\\s*:)"
comparison_rgx <- "(COMPARISON\\s*:)|(Comparison\\s*:)|(COMPARISONS\\s*:)"
technique_rgx <- "(TECHNIQUE\\s*:)|(Technique\\s*:)"
body_rgx <- "(Findings/impression\\s*:)|(FINDINGS\\s*:)|(Findings\\s*:)|((?<!\\d\\.\\s)Finding\\s)|((?<!\\d\\.\\s)Findings\\s)|(Lumbosacral\\s*spine\\s*findings\\s*:)"
imp_rgx <- "(IMPRESSIO\\s*:)|(\\s{1}impression\\s*:)|(IMPRESSION\\s*:)|(IMPRESSIONS\\s*:)|(Impression\\s*:)|(Impression\\s)|(Impressions\\s*:)|(Combined\\s*impressions\\s*:)|(Combined\\s*impression\\s*:)|(Conclusions\\s*:)|(Conclusion\\s*:)|(Pression\\s*:)|(Pressions\\s*:)"
datetime_rgx <- "Date\\s*:\\s*(\\d){2}\\/(\\d){2}\\/(\\d){4}\\s*Time\\s*:(\\d){1,2}:(\\d){2}(PM)?$"
}
if(site == 4){
hst_rgx <- "(Clinical\\s*History\\s*:)(\\[\\s*HST\\s*\\]\\s*:)|(HISTORY\\s*:)|(History\\s*:)"
exam_rgx <- "(EXAMINATION\\s*:)|(EXAM\\s*:)|(Exam\\s*:)|(Examination\\s*:)"
comparison_rgx <- "(COMPARISON\\s*:)|(Comparison\\s*:)|(COMPARISONS\\s*:)|(Indication\\s*:)|(Indications\\s*:)|(INDICATION\\s:)"
technique_rgx <- "(TECHNIQUE\\s*:)|(Technique\\s*:)"
body_rgx <- "(Findings\\s*/Impression\\s*:)|(FINDINGS\\s*/impression\\s*:)|(Findings\\s*/\\s*impression\\s*:)|(\\sFindings\\s*:)|(FINDINGS\\s*:)|(RESULT\\s*:)|(RESULTS\\s*:)|(Results\\s*:)|(Result\\s:)|(THORACIC\\s*SPINE\\s*:)"
imp_rgx <- "(Impression\\s*:)|(\\sImpression\\s*:)|(IMPRESSION:)"
datetime_rgx <- "Date\\s*:\\s*(\\d){2}\\/(\\d){2}\\/(\\d){4}\\s*Time\\s*:(\\d){1,2}:(\\d){2}(PM)?$"
}
df.site <- subset(df, siteID == site) %>%
#### Remove LIRE intervention text
mutate(imagereporttext = gsub(liretext_rgx, "", imagereporttext, perl = TRUE)) %>%
### Insert <TAGS>
mutate(tagged = gsub(hst_rgx, "<TAG_BEFORE_HISTORY>", imagereporttext, perl = TRUE),
tagged = gsub(exam_rgx, "<TAG_BEFORE_EXAM>", tagged, perl = TRUE),
tagged = gsub(comparison_rgx, "<TAG_BEFORE_COMPARISON>", tagged, perl = TRUE),
tagged = gsub(technique_rgx, "<TAG_BEFORE_TECHNIQUE>", tagged, perl = TRUE),
tagged = gsub(body_rgx, "<TAG_BEFORE_BODY>", tagged, perl = TRUE),
tagged = gsub(imp_rgx, "<TAG_BEFORE_IMPRESSION>", tagged, perl = TRUE),
tagged = gsub(datetime_rgx, "<TAG_DATETIME>", tagged, perl = TRUE)) %>%
### Split by <TAGS>
separate(., tagged, into = c("impression", "datetime"), sep = "<TAG_DATETIME>") %>%
separate(., impression, into = c("body", "impression"), sep = "<TAG_BEFORE_IMPRESSION>") %>%
separate(., body, into = c("technique", "body"), sep = "<TAG_BEFORE_BODY>") %>%
separate(., technique, into = c("comparison", "technique"), sep = "<TAG_BEFORE_TECHNIQUE>") %>%
separate(., comparison, into = c("exam", "comparison"), sep = "<TAG_BEFORE_COMPARISON>") %>%
separate(., exam, into = c("history", "exam"), sep = "<TAG_BEFORE_EXAM>") %>%
mutate(history = gsub("^<TAG_BEFORE_HISTORY>", "", history, perl = TRUE),
history = gsub("^(\\s*\\*\\*NAME\\s*\\s*)$", "", history, perl = TRUE))
return(df.site)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.