#' Creates a set of lookups for SNOMED composition
#'
#' Creates composition lookup table for a set of SNOMED CT concepts
#' @param conceptIds SNOMED CT concept IDs for creating decompositions
#' @param CDB concept database environment, containing a table called
#' FINDINGS
#' @param SNOMED environment containing a SNOMED dictionary
#' @param output_filename filename of output file
#' @return TRUE if successful
#' @export
#' @seealso [decompose()]
#' @examples
#' # Not run
#'
#' mylookup <- createComposeLookup(D)
batchDecompose <- function(conceptIds, CDB, output_filename,
SNOMED = getSNOMED(), ...){
conceptIds <- as.SNOMEDconcept(conceptIds, SNOMED = SNOMED)
started <- FALSE
for (i in seq_along(conceptIds)){
D <- NULL
try(D <- decompose(conceptIds[i], CDB = CDB, SNOMED = SNOMED, ...))
if (is.null(D)){
message('Error in analysing concept ', conceptIds[i], '(',
description(conceptIds[i])$term[1], ')')
} else {
fwrite(D, output_filename, append = started)
started <- TRUE
}
}
return(started)
}
#' Creates a set of lookups for SNOMED composition
#'
#' Creates composition lookup table for a set of SNOMED CT concepts
#' @param decompositions filename of decompose output (read by fread) or
#' data.frame containing outputs of decompose function
#' @param CDB concept database environment, containing a table called
#' FINDINGS
#' @param ... other arguments to pass to fread
#' @return data.table
#' @export
#' @seealso [decompose()]
#' @examples
#' # Not run
#'
#' mylookup <- createComposeLookup(D)
createComposeLookup <- function(decompositions, CDB, ...){
sct_concept_colnames <- c('rootId', 'with', 'due_to',
'after', 'without', 'body_site', 'severity', 'stage',
'laterality', 'origId')
if (is.character(decompositions)){
D <- fread(decompositions,
colClasses = list(character = c(sct_concept_colnames,
'other_conceptId')), ...)
} else {
D <- copy(as.data.table(D))
}
for (i in sct_concept_colnames){
D[, .temp := as.SNOMEDconcept(bit64::as.integer64(D[[i]]),
SNOMED = SNOMED)]
D[.temp == 0, .temp := NA]
# explicit conversion using bit64::as.integer64 is to ensure
# that missing values and '' are handled correctly
D[, (i) := NULL]
setnames(D, '.temp', i)
}
# Remove rows with outstanding text
D <- D[!(other_conceptId %like% '[[:alpha:]]')]
D[, other_conceptId := gsub('^ +| +$', '', other_conceptId)]
# Separate due to findings; due to anything else is other_attr
D[!(due_to %in% CDB$FINDINGS$conceptId) & !is.na(due_to),
other_conceptId := paste(due_to, other_conceptId)]
D[!(due_to %in% CDB$FINDINGS$conceptId) & !is.na(due_to),
due_to := NA_integer64_]
# Combine 'due_to' and 'after' because very few SNOMED CT concepts
# have both with different concepts in each. If due_to and after
# are different concepts, do not use this decomposition as it is
# too complex for this algorithm
D <- D[is.na(due_to) | is.na(after) | after == due_to]
D[is.na(due_to) & !is.na(after), due_to := after]
# Add body site, severity, stage to other_conceptId
D[!is.na(body_site), other_conceptId := paste(body_site, other_conceptId)]
D[!is.na(severity), other_conceptId := paste(severity, other_conceptId)]
D[!is.na(stage), other_conceptId := paste(stage, other_conceptId)]
D[!is.na(laterality), other_conceptId := paste(laterality, other_conceptId)]
# Prepend root conceptId so that it is accessible when sorting
# attributes by frequency
D[, other_conceptId := paste(rootId, other_conceptId)]
D[, other_conceptId := strsplit(other_conceptId, ' ')]
# Create a frequency table of other_attr per rootId
FREQ <- D[, .(.temp = unlist(other_conceptId)), by = rootId]
FREQ[, attrId := as.SNOMEDconcept(.temp, SNOMED = SNOMED)]
FREQ <- FREQ[, .(freq = .N), by = .(attrId, rootId)][order(rootId, freq)]
FREQ <- FREQ[!attrId == rootId]
# Sort by ascending order of frequency
D[, attrId := lapply(other_conceptId, function(x){
if (length(x) <= 1){
as.SNOMEDconcept(bit64::integer64(0), SNOMED = SNOMED)
} else {
x <- unique(as.SNOMEDconcept(x, SNOMED = SNOMED))
FREQ[rootId == x[1]][(data.table(attrId = x[2:length(x)])),
on = 'attrId'][order(freq)]$attrId
}
})]
# Split into separate columns for fast matching
maxcol <- max(sapply(D$attrId, length))
for (i in 1:maxcol){
# extract attribute Ids via as.character to avoid incorrect
# conversion to numeric
D[, .temp := sapply(attrId, function(x){
ifelse(i > length(x), NA_character_, as.character(x[i]))
})]
D[, .temp2 := as.SNOMEDconcept(bit64::as.integer64(.temp),
SNOMED = SNOMED)]
setnames(D, '.temp2', paste0('attr_', i))
D[, .temp := NULL]
}
cols_to_keep <- c('rootId', 'with', 'due_to', 'without',
paste0('attr_', 1:maxcol), 'origId')
D <- D[, ..cols_to_keep]
setorderv(D, cols_to_keep)
setkeyv(D, cols_to_keep)
for (i in cols_to_keep) setindex(D, i)
D
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.