Nothing
extractEntity <- function(data, enttype,
CALIBER_ENTITY = NULL, CALIBER_LOOKUPS = NULL, ...){
# Extracts and decodes a specified entity type from a
# ffdf or data.table, automatically using CALIBER lookups
# to decode the data. Returns a data.table.
# Coded integer values are converted to factors
# YYYYMMDD dates are converted to IDate
# Medcodes, prodcodes and score method are converted to factors
# (Read terms, product names or score method) if the
# CALIBERlookups package is available
# Arguments: data -- ffdf or data.table or data.frame
# enttype -- one or more entity types to extract;
# if extracting from multiple entity types, the
# data fields must be identical -- this will be
# checked
# CALIBER_ENTITY -- entity definitions
# CALIBER_LOOKUPS -- lookups tables
# ... -- arguments to pass to YYYYMMDDtoDate
# Requires the CALIBERlookups package, or lookups to be
# in the 'data' folder of the current working directory
if (is.null(CALIBER_ENTITY)){
# Load the CALIBERlookups package, but do not 'require' it
# explicity so that R CMD check does not report an error -
# the CALIBERlookups package will not be available
# to external users.
eval(parse(text="require('CALIBERlookups', quietly=TRUE)"))
data('CALIBER_ENTITY', envir = environment())
} else {
# Check that CALIBER_ENTITY is a data.table
CALIBER_ENTITY <- as.data.table(CALIBER_ENTITY)
}
this_enttype <- enttype
# If multiple entity types, ensure that the data fields are identical
if (length(this_enttype) > 1){
for (i in 2:length(this_enttype)){
if (!identical(
unlist(CALIBER_ENTITY[enttype == this_enttype[i],
colnames(CALIBER_ENTITY)[grepl('data',
colnames(CALIBER_ENTITY))], with = FALSE]),
unlist(CALIBER_ENTITY[enttype == this_enttype[i - 1],
colnames(CALIBER_ENTITY)[grepl('data',
colnames(CALIBER_ENTITY))], with = FALSE]))){
stop('If extracting multiple entity types the data specification must be identical')
}
}
}
if (is.ffdf(data)){
use <- as.ffdf(data.frame(enttype = this_enttype, use = TRUE))
A <- as.data.table(as.data.frame(merge(data, use, by = 'enttype')))
A[, use := NULL]
} else {
A <- as.data.table(data[data$enttype %in% this_enttype, ])
}
# Generate new columns with interpreted data
# (doesn't translate medcode or prodcode)
# If multiple rows, they must be identical
template <- CALIBER_ENTITY[enttype %in% this_enttype][1]
# template is a data.table with 1 row
uselookup <- function(vector, lookupname, categorycol, labelcol,
stringsAsFactors = TRUE){
if (length(find.package('CALIBERlookups', quiet = TRUE)) > 0){
# Load the CALIBERlookups package, but do not 'require' it
# explicity so that R CMD check does not report an error -
# the CALIBERlookups package will not be available
# to external users.
eval(parse(text = "require('CALIBERlookups', quietly=TRUE)"))
eval(parse(text = paste('data("', lookupname,
'", envir = environment())', sep = '')))
temp <- copy(get(lookupname))
setkeyv(temp, categorycol)
if (any(!is.na(vector))){
# check that vector is not all missing, as this would cause
# an error in max(vector)
if (stringsAsFactors){
factor(vector, levels = 1:max(vector, na.rm = TRUE),
labels = temp[J(1:max(vector, na.rm = TRUE)),
labelcol, with = FALSE, mult='first'][[labelcol]])
} else {
temp[J(vector), labelcol, with = FALSE,
mult='first'][[labelcol]]
}
}
} else {
stop('CALIBERlookups package unavailable.')
}
}
# Remove any blank data columns
for (i in 9:(template$data_fields + 1)){
if ('data' %&% i %in% names(A)){
A[, ('data' %&% i) := NULL]
}
}
if (template$data_fields > 0){
# Generate extra columns with interpreted information
for (i in 1:template$data_fields){
if (!(('data' %&% i) %in% colnames(A))){
# This data column is not present in the data
message('Column data' %&% i %&% ' is missing.')
} else {
# Create new variable names
newname <- sub('\\.$', '',
make.names(template[['data' %&% i]]))
datatype <- as.character(template[['data' %&% i %&% '_lkup']])
# Note that for N_A and P_A, category 0 is not 'missing';
# however it will be coded to NA for consistency with the
# other lookups. The original data are retained when
# creating factors.
if (datatype == 'Medical Dictionary'){
# Convert to character instead of factor
# to remove unused terms
temp <- uselookup(A[['data' %&% i]],
'CALIBER_DICT', 'medcode', 'term',
stringsAsFactors = FALSE)
A[, newname := temp, with = FALSE]
setnames(A, 'data' %&% i, newname %&% '.medcode')
} else if (datatype == 'Product Dictionary'){
# Convert to character to remove unused products
temp <- uselookup(A[['data' %&% i]],
'CALIBER_PRODDICT', 'prodcode', 'prodname',
stringsAsFactors = FALSE)
A[, (newname) := temp]
# Convert the name of the original data column
setnames(A, 'data' %&% i, newname %&% '.prodcode')
} else if (datatype == 'Scoring'){
temp <- uselookup(A[['data' %&% i]],
'CALIBER_SCOREMETHOD', 'code', 'scoringmethod')
A[, (newname) := temp]
} else if (datatype == 'YYYYMMDD'){
# Find out type of result
newtype <- rep('missing', nrow(A))
newtype[istrue(A[['data' %&% i]] > 1800 &
A[['data' %&% i]] < 2050)] <- 'year'
newtype[istrue(A[['data' %&% i]] > 180000 &
A[['data' %&% i]] < 205000)] <- 'yearmonth'
newtype[istrue(A[['data' %&% i]] > 18000000 &
A[['data' %&% i]] < 20500000)] <- 'date'
A[, (newname) := YYYYMMDDtoDate(A[['data' %&% i]], ...)]
A[, (newname %&% '.datetype') := newtype]
} else if (datatype == 'GEN_SDC'){
newtype <- rep('missing', nrow(A))
newtype[istrue(A[['data' %&% i]] > 3)] <- 'date'
A[, newname := GEN_SDCtoDate(A[['data' %&% i]]),
with = FALSE]
A[, (newname %&% '.datetype') := newtype]
} else if (datatype == ''){
# No conversion necessary, just change the name
setnames(A, 'data' %&% i, newname)
} else {
# need to load lookup tables
if (is.null(CALIBER_LOOKUPS)){
if (length(find.package('CALIBERlookups', quiet = TRUE)) > 0){
# Load the CALIBERlookups package, but do not 'require' it
# explicity so that R CMD check does not report an error -
# the CALIBERlookups package will not be available
# to external users.
eval(parse(text = "require('CALIBERlookups', quietly=TRUE)"))
data('CALIBER_LOOKUPS', envir = environment())
} else {
stop('CALIBERlookups package unavailable.')
}
} else {
# Ensure that CALIBER_LOOKUPS is a data.table
CALIBER_LOOKUPS <- as.data.table(CALIBER_LOOKUPS)
}
if (is.null(CALIBER_LOOKUPS)){
stop('Unable to fund CALIBER_LOOKUPS')
}
# CALIBER_LOOKUPS should exist by now
if (datatype %in% CALIBER_LOOKUPS$lookup){
thislookup <- CALIBER_LOOKUPS[lookup == datatype]
# Add category number in front of description to avoid duplicates
thislookup[, description := category %&% '. ' %&% description]
setkey(thislookup, category)
maxvalue <- max(thislookup$category)
temp <- factor(A[['data' %&% i]],
levels = 1:maxvalue,
labels = thislookup[J(1:maxvalue)]$description)
A[, newname := temp, with = FALSE]
}
}
}
}
}
A
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.