Nothing
# Standard column order for codelist:
# CODELIST_COLORDER <- c('category', 'code', 'term')
# medcode last for Read terms, any other columns after
codelist <- function(x = NULL, Name = NULL, Version = NULL,
Author = NULL, Date = NULL, Categories = NULL,
Source = NULL, ...){
# Creates an object of class 'codelist' with metadata
# and checks that all terms included in the codelist are in CALIBER_DICT
# Arguments: x - a dictionary name (read, opcs, icd10), a
# filename of a CSV, Stata or tab delimited text file,
# a selection object, or a data.frame of terms.
# Check that the master dictionary is loaded
loadDICT()
if (is.null(x)){
if (length(getdictionary()) == 1){
x <- getdictionary()
} else {
x <- select.list(ALLDICTS, multiple = FALSE,
title = 'Which dictionary to extract codes from?')
}
cat('\nExtracting codelist from ' %&% x %&%
' with non-missing categories.\n')
out <- dictToCodelist(x)
} else if (is.selection(x)){
out <- selectionToCodelist(x, ...)
} else if (is.codelist(x)){
out <- x
} else if (is.data.frame(x)){
out <- tableToCodelist(x, ...)
} else if (is.character(x) & length(x) == 1){
if (x %in% ALLDICTS){
cat('\nExtracting codelist from terms in ' %&% x %&%
' with non-missing categories.\n')
out <- dictToCodelist(x)
} else {
temp <- NULL
if (grepl('.dta$', x)){
temp <- importDTA(x)
} else if (grepl('.csv$', x)){
temp <- importCSVcodelist(x, ...)
} else if (grepl('.txt$', x)){
temp <- read.delim(x, as.is=TRUE, sep='\t', ...)
}
if (is.null(temp)){
stop('Unable to load codelist from file')
} else {
out <- tableToCodelist(temp)
}
}
}
# Set metadata
if (!is.null(Name)){
setMetadata(out, Name = Name)
}
if (!is.null(Version)){
setMetadata(out, Version = Version)
}
if (!is.null(Author)){
setMetadata(out, Author = Author)
}
if (!is.null(Date)){
setMetadata(out, Date = Date)
}
if (!is.null(Categories)){
setMetadata(out, Categories = Categories)
}
if (!is.null(Source)){
setMetadata(out, Source = Source)
}
out
}
importDTA <- function(x){
# Imports Stata DTA file, automatically setting
# factor levels 0 and negative to NA
# and restoring the correct labels using the
# label.table attribute
x <- read.dta(x, convert.factors = FALSE)
# Now loop through all variables, converting to factors as
# appropriate
if (!is.null(attr(x, 'label.table'))){
for (i in names(attr(x, 'label.table'))){
newlevels <- attr(x, 'label.table')[[i]]
newlabels <- names(attr(x, 'label.table')[[i]])
if (max(newlevels) > 0){
keeplevels <- 1:max(newlevels)
# Arrange the labels in ascending numerical order
keeplabels <- sapply(keeplevels, function(z){
if (any(newlevels==z)){
newlabels[which(newlevels==z)]
} else {
z
}
})
# Find out which columns this applies to
colnums <- which(attr(x, 'val.labels')==i)
if (length(colnums) > 0){
for (col in colnums){
x[, col] <- factor(x[, col], levels=keeplevels,
labels=keeplabels)
}
}
}
}
}
x
}
dictToCodelist <- function(dictionary){
# Extracts all categories with positive numbers into a codelist.
if (length(dictionary)!=1){
stop('A codelist can contain terms from only one dictionary')
}
if (!(dictionary %in% ALLDICTS)){
stop('dictionary should be in ' %&% paste(ALLDICTS, collapse='; '))
}
metadata <- extractMetadataFromMETA(dictionary)
if (dictionary=='read'){
temp <- copy(CALIBER_DICT[dict==dictionary & category>=0,
list(category, code, term, medcode, events)])
} else {
temp <- copy(CALIBER_DICT[dict==dictionary & category>=0,
list(category, code, term)])
}
addAttributesToCodelist(temp, metadata)
# Set the first 3 columns to be category, code, term
setcolorder(temp, c(CODELIST_COLORDER,
setdiff(names(temp), CODELIST_COLORDER)))
class(temp) <- c('codelist', 'data.table', 'data.frame')
# All exported codelists are sorted by category, then code
setkey(temp, category, code)
return(temp)
}
selectionToCodelist <- function(x, category=1,
dictionary=NULL){
# Converts a selection to a codelist object
# Uses metadata from META
# Uses the current dictionary unless more than one is in use
# and the terms come from more than one dictionary, in which
# case the user is asked which dictionary to use.
x <- as.selection(x)
# Swap names
newcategory <- category
rm(category)
if (is.null(dictionary)){
dictionary <- getdictionary()
}
getDict <- unique(sub('icdhead', 'icd10',
unique(CALIBER_DICT[as.logical(x), dict])))
if (!is.null(dictionary)){
getDict <- intersect(dictionary, getDict)
}
if (length(getDict)>1){
getDict <- intersect(getDict, getdictionary())
}
if (length(getDict)>1){
stop('Terms come from more than one dictionary')
} else if (length(getDict)==0) {
warning('Empty codelist')
x <- copy(CALIBER_DICT[FALSE,
list(category=integer(0), code, term, medcode, events)])
getDict <- dictionary[1]
} else {
cat('\nUsing terms from ' %&% getDict)
x <- copy(CALIBER_DICT[as.logical(x) == TRUE & dict == getDict,
list(category = newcategory, code, term, medcode, events)])
}
if (getDict!='read'){
# medcode is irrelevant
x[, medcode:=NULL]
# number of events not available for ICD-10 or OPCS
x[, events:=NULL]
}
# Ensure that category is registered in META
temp <- retrieveCategoriesFromMETA()
if (nrow(temp)==0){
addCategory(newcategory)
} else if (!(newcategory %in% temp$category)){
addCategory(newcategory)
}
metadata <- extractMetadataFromMETA(getDict)
x <- addAttributesToCodelist(x, metadata)
class(x) <- c('codelist', 'data.table', 'data.frame')
# Expand if ICD-9 or ICD-10 codelist
if (getDict %in% c('icd9', 'icd10')){
if (!(isExpandedCodelist(x))){
x <- copy(expandCodelist(x))
}
}
# All exported codelists are sorted by category, then code
setkey(x, category, code)
setcolorder(x, c(CODELIST_COLORDER,
setdiff(names(x), CODELIST_COLORDER)))
return(x)
}
tableToCodelist <- function(x, noisy = FALSE){
# Converts a table (data.frame, data.table or list)
# to a codelist. Maps on medcode for Read codelists,
# otherwise on ICD-10 or OPCS code.
# Arguments: x - a codelist in the standard format, or a
# data.frame or data.table
if (ncol(x) == 0){
stop('No columns in table')
}
# Before converting to a data.table, extract useful
# metadata
if (!is.null(attr(x, 'datalabel'))){
datalabel <- attr(x, 'datalabel')
} else {
datalabel <- ''
}
if (!is.null(attr(x, 'time.stamp'))){
time.stamp <- attr(x, 'time.stamp')
} else {
time.stamp <- ''
}
x <- copy(data.table(x))
# Remove duplicates
x <- copy(x[!duplicated(x)])
#### STANDARDISE COLUMN NAMES ####
stdNames <- function(ideal, search1, search2, names){
# Arguments: ideal = name to convert to
# search1 = what to search for in name (e.g. gprd)
# search2 = second thing to search for (e.g. code)
# names = vector of all column names
if (ideal %in% names){
names
} else {
chosen <- grepl(search1, names) & grepl(search2, names)
# Only convert if exactly one column name matches
if (sum(chosen) == 1) {
names[chosen] <- ideal
}
names
}
}
newnames <- names(x)
newnames <- stdNames('readcode',
'^Read|^READ|^read', 'code|Code|CODE', newnames)
newnames <- stdNames('icd9_code',
'^Icd9|^ICD9|^icd9', 'code|Code|CODE', newnames)
newnames <- stdNames('icd_code',
'^Icd[^9]|^ICD[^9]|^icd[^9]', 'code|Code|CODE', newnames)
newnames <- stdNames('opcs_code',
'^Opcs|^OPCS|^opcs', 'code|Code|CODE', newnames)
newnames <- stdNames('prodcode',
'^Prod|^PROD|^prod', 'code|Code|CODE', newnames)
newnames <- stdNames('readterm',
'^Read|^READ|^read', 'term|Term|TERM', newnames)
newnames <- stdNames('icd9_term',
'^Icd9|^ICD9|^icd9', 'term|Term|TERM', newnames)
newnames <- stdNames('icd_term',
'^Icd[^9]|^ICD[^9]|^icd[^9]', 'term|Term|TERM', newnames)
newnames <- stdNames('opcs_term',
'^Opcs|^OPCS|^opcs', 'term|Term|TERM', newnames)
newnames <- stdNames('prodname',
'^Prod|^PROD|^prod', 'name|Name|NAME', newnames)
newnames <- stdNames('multilex',
'^Multi[Ll]ex|^MULTILEX', '', newnames)
newnames <- stdNames('medcode',
'^Med[Cc]ode|^MEDCODE', '', newnames)
newnames <- stdNames('readcode',
'^READ$|^[Rr]ead$', '', newnames)
newnames <- stdNames('icd9_code',
'^ICD9$|^[Ii]cd9$', '', newnames)
newnames <- stdNames('icd_code',
'^ICD$|^[Ii]cd$', '', newnames)
newnames <- stdNames('opcs_code',
'^OPCS$|^[Oo]pcs$', '', newnames)
setnames(x, names(x), newnames)
#### MEDCODE COLUMN ####
if ('medcode' %in% names(x)){
# Is it a genuine medcode column?
if (all(is.na(x$medcode))){
x[, medcode:=NULL]
} else if (is.character(x$medcode)){
x[, medcode:=NULL]
}
}
#### LOWER CASE TERM (DELETE COLUMN) ####
if ('termlc' %in% names(x)){
x[, termlc:=NULL]
}
#### CATEGORIES ####
if (!('category' %in% names(x))){
# try to guess the category name. Exclude column names which
# are commonly found in codelists.
catname <- setdiff(names(x), c('medcode', 'readcode', 'events',
'readterm', 'description', 'shortname', 'clin_events',
'termlc', 'ref_events', 'test_events', 'immun_events',
'metadata', 'icd', 'icd_code', 'icd_term', 'icd9_code',
'icd9_term', 'opcs_code', 'opcs_term',
'code', 'term', 'db_date', 'lc_readterm', 'prodcode',
'multilex', 'events', 'prodname', 'drugsubstance',
'strength', 'formulation', 'route', 'bnfcode', 'bnf',
'bnfheader', 'db', 'therapyevents', 'drugsubstancename',
'substancestrength', 'formulation', 'routeofadministration',
'databasebuild', 'clinicalevents', 'referralevents',
'testevents', 'immunisationevents'))[1]
if (is.na(catname)){
# create a category with 1
if (nrow(x) > 0){
x[, category:=1L]
}
catname <- 'category'
}
if (nrow(x) > 0){
setnames(x, catname, 'category')
}
}
# Convert the category column to numbers if not already done so
if (is.character(x$category) | is.factor(x$category)){
categories <- splitCategory(x$category)
x[, category:=categories$category]
categories <- categories[!duplicated(categories)][order(category)]
} else if ('shortname' %in% names(x) & !('description' %in% names(x))){
temp <- x[, list(category, shortname, description=shortname)][
order(category)]
categories <- temp[!duplicated(temp)]
} else if ('description' %in% names(x) & !('shortname' %in% names(x))){
temp <- x[, list(category, shortname=description, description)][
order(category)]
categories <- temp[!duplicated(temp)]
} else if ('description' %in% names(x) & 'shortname' %in% names(x)){
temp <- x[, list(category, shortname, description)][order(category)]
categories <- temp[!duplicated(temp)]
} else {
# all category labels are blank
if (nrow(x)==0){
categories <- data.table(category=integer(0),
shortname=character(0), description=character(0))
} else if (all(is.na(x$category))) {
categories <- data.table(category=integer(0),
shortname=character(0), description=character(0))
} else {
categories <- data.table(category=sort(unique(x$category)),
shortname='', description='')
}
}
# Remove duplicate categories
allcats <- unique(categories$category)
setkey(categories, category)
categories <- copy(categories[J(allcats), , mult='first'])
#### METADATA ####
if ('metadata' %in% names(x)){
metadata <- extractMetadataFromColumn(x$metadata)
x[, metadata:=NULL]
} else if (datalabel != '') {
# Stata metadata in datalabel
metadata <- extractMetadataFromDatalabel(datalabel, time.stamp)
} else {
# no metadata
metadata <- list()
}
# Add category table
if (is.null(metadata$Categories)){
metadata$Categories <- categories
}
# metadata$Source cannot be NULL
if (is.null(metadata$Source)){
metadata$Source <- ''
}
# Standardise column names
if ('prodcode' %in% colnames(x)){
# This is a product codelist
# multilex == code (column renamed 'multilex' on export)
# prodname == term (column renamed on export)
# (prodcode is prodcode)
if (!(metadata$Source %in% SOURCEDICTS[dict == 'product', Source])){
metadata$Source <- 'GPRDPROD'
}
if ('multilex' %in% colnames(x)) setnames(x, 'multilex', 'code')
if ('prodname' %in% colnames(x)) setnames(x, 'prodname', 'term')
} else if ('readcode' %in% names(x)){
if (!(metadata$Source %in% SOURCEDICTS[dict == 'read', Source])){
metadata$Source <- 'GPRD'
}
setnames(x, 'readcode', 'code')
# Obtain medcodes if there are none
if (!('medcode' %in% names(x))){
x[, dict:='read']
setkey(x, dict, code)
setDictKey()
x[, medcode:=CALIBER_DICT[x][, medcode]]
x[, dict:=NULL]
}
if ('readterm' %in% names(x)){
setnames(x, 'readterm', 'term')
}
} else if ('icd9_code' %in% names(x)){
if (!(metadata$Source %in% SOURCEDICTS[dict == 'icd9', Source])){
metadata$Source <- 'ONSICD9'
}
if ('icd9_term' %in% names(x)){
setnames(x, 'icd9_term', 'term')
}
setnames(x, 'icd9_code', 'code')
} else if ('icd_code' %in% names(x)){
if (!(metadata$Source %in% SOURCEDICTS[dict == 'icd10', Source])){
metadata$Source <- 'ONS'
}
if ('icd_term' %in% names(x)){
setnames(x, 'icd_term', 'term')
}
setnames(x, 'icd_code', 'code')
} else if ('opcs_code' %in% names(x)){
if (!(metadata$Source %in% SOURCEDICTS[dict == 'opcs', Source])){
metadata$Source <- 'OPCS'
}
if ('opcs_term' %in% names(x)){
setnames(x, 'opcs_term', 'term')
}
setnames(x, 'opcs_code', 'code')
} else if ('medcode' %in% names(x)){
if (!(metadata$Source %in% SOURCEDICTS[dict == 'read', Source])){
metadata$Source <- 'GPRD'
}
if ('readterm' %in% names(x)){
setnames(x, 'readterm', 'term')
}
}
# metadata$Source cannot be NULL
if (is.null(metadata$Source)){
metadata$Source <- ''
}
if (metadata$Source==''){
if ('dict' %in% names(x)){
# If there is a 'dict' column and all terms come from the same dict:
getDict <- unique(sub('icdhead', 'icd10',
sub('icd9head', 'icd9', unique(x$dict))))
if (length(getDict)==1){
# Default source
metadata$Source <- switch(getDict,
read='GPRD', opcs='OPCS', icd10='HES', icd9='ONSICD9')
} else {
stop('Terms come from more than one dictionary')
}
} else if (length(getdictionary())==1){
# If only one dictionary is in use:
metadata$Source <- switch(getdictionary(),
read='GPRD', opcs='OPCS', icd10='HES', icd9='ONSICD9')
} else {
stop('Unable to determine source dictionary')
}
}
#### CHECK CODES AGAINST MASTER CALIBER_DICTONARY ####
# (except for product codelists)
if (metadata$Source %in%
SOURCEDICTS[dict == 'read', Source]){
TEMP <- CALIBER_DICT[dict=='read',
list(medcode, .corrCode=code, .corrTerm=term)]
setkey(TEMP, medcode)
if (any(is.na(x$medcode))){
x <- copy(x[!is.na(medcode)])
}
setkey(x, medcode)
x[, .corrCode:=TEMP[x][, .corrCode]]
# If x has Read codes. check for discrepancies
if ('code' %in% names(x)){
for (i in 1:nrow(x)){
if (x[i, code] != x[i, .corrCode] & !is.na(x[i, code])){
if (noisy){
message('Read code for medcode ' %&%
x[i, medcode] %&% ' corrected from ' %&%
x[i, code] %&% ' to ' %&% x[i, .corrCode])
}
x[i, code:=.corrCode]
}
}
x[, .corrCode:=NULL]
} else {
# No Read terms originally supplied, so use the new ones
setnames(x, '.corrCode', 'code')
}
# Add Read terms if absent
if (!('term' %in% names(x))){
x[, term:=TEMP[x][, .corrTerm]]
}
} else if (metadata$Source %in%
SOURCEDICTS[dict %in% c('icd9', 'icd10'), Source]){
# Clean up codes - remove spaces and .
x[, code := gsub('[\\. ]', '', code)]
if (!(isExpandedCodelist(x))){
setattr(x, 'Source', metadata$Source)
class(x) <- c('codelist', 'data.table', 'data.frame')
x <- copy(expandCodelist(x))
}
} else if (metadata$Source %in%
SOURCEDICTS[dict == 'opcs', Source]){
TEMP <- CALIBER_DICT[dict == 'opcs', list(code,
.corrCode = code, .corrTerm = term)]
# Clean up codes - remove spaces and .
x[, code := gsub('[\\. ]', '', code)]
setkey(TEMP, code)
setkey(x, code)
x[, .corrCode:=TEMP[x][, .corrCode]]
for (i in 1:nrow(x)){
if (is.na(x[i, .corrCode])){
if (noisy & !is.na(x[i, code])){
if (x[i, code] != ''){
message('OPCS code ' %&%
x[i, code] %&%
' is not in the master dictionary.')
}
}
}
}
x[, .corrCode:=NULL]
# Add OPCS terms if absent
if (!('term' %in% names(x))){
x[, term:=TEMP[x][, .corrTerm]]
}
}
# Remove any missing codes, and check for duplicate categories
if (metadata$Source %in%
SOURCEDICTS[dict == 'read', Source]){
x <- copy(x[!is.na(medcode) &
!duplicated(x[, list(medcode, category)])])
# Check for any remaining duplicate codes (which must be in
# more than one category)
if (any(duplicated(x$medcode))){
# Find one example
exampleMedcode <- x[min(which(duplicated(x$medcode))), medcode]
exampleCategories <- paste(x[medcode==exampleMedcode, category],
collapse=', ')
stop('Medcode ' %&% exampleMedcode %&% ' is in categories ' %&%
exampleCategories %&%
'.\nIt is not permitted for a term to be in more than one category.\n' %&%
'Consider combining some categories and editing afterwards.')
}
} else {
x <- copy(x[!is.na(code) & !(code=='') &
!duplicated(x[, list(code, category)])])
# Check for any remaining duplicate codes (which must be in
# more than one category)
if (any(duplicated(x$code))){
# Find one example
exampleCode <- x[min(which(duplicated(x$code))), code]
exampleCategories <- paste(x[code==exampleCode, category], collapse=', ')
stop('Code ' %&% exampleCode %&% ' is in categories ' %&%
exampleCategories %&%
'.\nIt is not permitted for a term to be in more than one category.\n' %&%
'Consider combining some categories and editing afterwards.')
}
}
#### ADD ATTRIBUTES ####
x <- addAttributesToCodelist(x, metadata)
class(x) <- c('codelist', 'data.table', 'data.frame')
# All exported codelists are sorted by category, then code
setkey(x, category, code)
# Reorder columns (except for therapy codelists)
if (metadata$Source != 'GPRDPROD'){
setcolorder(x, c(CODELIST_COLORDER,
setdiff(names(x), CODELIST_COLORDER)))
}
return(x)
}
as.codelist <- function(x=NULL, ...){
# Checks if an object is of class 'codelist'
# Returns an object of class 'codelist' with metadata
# Arguments: x - a codelist or anything which can be coerced to
# a codelist
if (is.codelist(x)){
return(x)
} else {
return(codelist(x, ...))
}
}
addAttributesToCodelist <- function(x, metadata){
# Returns the codelist with attributes added from a list
# Arguments: x - codelist for which attributes are to be added
# metadata - list of metadata
for (theAttribute in c('Name', 'Version', 'Source', 'Author',
'Date', 'Timestamp', 'Categories', 'Expanded')){
if (theAttribute %in% names(metadata)){
setattr(x, theAttribute, metadata[[theAttribute]])
} else {
setattr(x, theAttribute, NULL)
}
}
return(x)
}
print.codelist <- function(x, ...){
printCodelist(x, ...)
}
printCodelist <- function(x, ...){
# S3 method for printing codelist objects
sourceDict <- getSourceDict(x)
if (is.null(sourceDict)){
stop('Source attribute is NULL')
}
cat('Codelist based on', sourceDict, 'dictionary with',
tolower(textTotalTerms(nrow(x))) %&% '.\n\n')
cat(paste(encodeMetadata(x, includeDescription = TRUE),
collapse = '\n'))
if (all(c('category', 'code') %in% colnames(x))){
cat('\n\nTERMS (sorted by category and code):\n')
printTerms(x[order(category, code)])
} else {
printTerms(x)
}
invisible(x)
}
is.codelist <- function(x){
# Checks the class attribute to decide whether x is a codelist
if ('codelist' %in% class(x)){
return(TRUE)
} else{
return(FALSE)
}
}
truncateChar <- function(x, maxchar){
# Truncates a character vector so that each element does not have more
# than a specified number of characters, adding ... to the end of
# truncated terms
# Arguments: x - character string to truncate
# maxchar - length to truncate to
convert <- nchar(x) > maxchar
x[convert] <- substr(x[convert], 1, maxchar-3) %&% '...'
x
}
extractMetadataFromCodelist <- function(codelist){
# Extract metadata from attributes of a codelist, returning it in a
# list. The Expanded attribute is also retrieved even though it is
# not printed or exported; this is for internal use and to ensure
# that it is carried through.
# Argument: codelist with attributes
metadata <- list()
for (theAttribute in c('Name', 'Version', 'Source', 'Author',
'Date', 'Timestamp', 'Categories', 'Expanded')){
metadata[[theAttribute]] <- attr(codelist, theAttribute)
}
return(metadata)
}
extractMetadataFromDatalabel <- function(datalabel, time.stamp){
# Extracts metadata (except category table) from a Stata datalabel
# Data elements are expected to be in the order:
# Name, Version, Date, Author
metadata <- list()
splitLabel <- sub('\\|', '', strsplit(datalabel, '\\|')[[1]])
# Trim spaces
splitLabel <- sub('^[ ]*', '', splitLabel)
splitLabel <- sub('[ ]*$', '', splitLabel)
# Version number is appended to the end of the variable name
metadata$Name <- splitLabel[1]
metadata$Version <- splitLabel[2]
metadata$Date <- splitLabel[3]
metadata$Author <- splitLabel[4]
# Source is the last part of the name (e.g. angina_gprd)
tmp <- strsplit(metadata$Name, '_')[[1]]
# use the last element of tmp (i.e. after the last _)
metadata$Source <- toupper(tmp[length(tmp)])
return(metadata)
}
extractMetadataFromColumn <- function(metadata){
# Extracts metadata from a character vector (e.g. a metadata column
# in a codelist CSV file), returning it in a list
# Argument: character vector containing metadata
metadata <- c(as.character(metadata), '')
searchfor <- function(thing){
use <- grep('^' %&% thing %&% ': ', metadata)
if (length(use)>0){
out <- sub('^' %&% thing %&% ': ', '',
metadata[use[1]])
# Remove trailing spaces
out <- sub('[ ]+$', '', out)
return(out)
} else {
return('')
}
}
out <- lapply(list('Name', 'Version', 'Source', 'Author',
'Date', 'Timestamp'), searchfor)
names(out) <- c('Name', 'Version', 'Source', 'Author',
'Date', 'Timestamp')
startfrom <- grep('Categories:', metadata)[1] + 1
if (length(startfrom) > 0){
temp <- splitCategory(metadata[
intersect(startfrom:length(metadata),
which(gsub("[' ]", '', metadata) != ''))])
# description is not provided in the 'column' metadata format
# so set it to be the same as shortname
temp$description <- temp$shortname
}
# Removing missing categories
temp <- temp[!is.na(temp$category),]
# Sort by category
temp2 <- data.table(temp)
setkey(temp2, category)
out$Categories <- temp2
return(out)
}
extractMetadataFromMETA <- function(dictName){
# Returns a list of codelist metadata from the META data.table
# Argument: dictionary name ('read', 'icd10' or 'opcs')
out <- list()
out$Name <- META[item == 'Name'][, value]
out$Version <- META[item == 'Version'][, value]
out$Source <- META[item == dictName][, value]
# If no source specified, use default
if (out$Source == 'TRUE'){
out$Source <- SOURCEDICTS[dict == dictName, Source][1]
}
out$Author <- META[item == 'Author'][,value]
out$Date <- META[item == 'Date'][,value]
out$Timestamp <- format(Sys.time(), '%H.%m %d-%b-%y')
out$Categories <- retrieveCategories()
# Name must have the source at the end e.g. _gprd
if (!grepl(tolower(out$Source) %&% '$', out$Name)){
out$Name <- out$Name %&% '_' %&% tolower(out$Source)
}
return(out)
}
encodeMetadata <- function(x, includeDescription=FALSE){
# Returns a character string with formatted metadata
# Creates a timestamp if there is none, and updates the codelist
# Argument: x - a codelist with attributes
# includeDescription - whether to include the long
# form of the description (no for export,
# yes when displaying a codelist on screen)
if (is.null(attr(x, 'Categories'))){
catTable <- NULL
} else if (!is.data.frame(attr(x, 'Categories'))) {
catTable <- NULL
} else {
categories <- attr(x, 'Categories')
if ('category' %in% names(categories)){
categories <- categories[!is.na(categories$category),]
categories <- categories[categories$category>0,]
usefulDescription <- ifelse((categories$description != '') &
(categories$description != categories$shortname),
categories$description, '')
if (includeDescription==FALSE){
usefulDescription <- ''
}
catTable <- format(categories$category[
categories$category>0]) %&% '. ' %&%
ifelse(is.na(categories$shortname), '', categories$shortname) %&%
ifelse(usefulDescription != '',
' (' %&% usefulDescription %&% ')', '')
} else {
catTable <- NULL
}
}
# Set a timestamp if there is none
if (is.null(attr(x, 'Timestamp'))){
setattr(x, 'Timestamp', format(Sys.time(), '%H.%m %d-%b-%y'))
} else if (attr(x, 'Timestamp')=='') {
setattr(x, 'Timestamp', format(Sys.time(), '%H.%m %d-%b-%y'))
}
return(c('Name: ' %&% attr(x, 'Name'),
'Version: ' %&% as.character(as.numeric(attr(x, 'Version'))),
'Source: ' %&% attr(x, 'Source'),
'Author: ' %&% attr(x, 'Author'),
'Date: ' %&% sanitizeDate(attr(x, 'Date')),
'Timestamp: ' %&% attr(x, 'Timestamp'),
'Categories:', catTable))
}
splitCategory <- function(categorystrings){
# Generates a data.frame of category number and shortname (brief
# description) from a character vector of numbered categories
# Argument: a character vector of strings in the format 1. XXX
# Convert any non-ASCII characters when converting to
# character (use iconv, sub=byte converts classes to byte.)
if (all(grepl('^[ ]*[[:digit:]]+\\.',
iconv(as.character(categorystrings), sub='byte')))){
# Use the numbers in the factor labels
categorystrings <- iconv(as.character(categorystrings), sub='byte')
# Match pattern:
# Optional space, digit, ., optional space, non-space, optional space
category <- as.integer(sub(
'^([ [:digit:]]+)\\.([[:print:]]*)$', '\\1',
categorystrings))
shortname <- sub('[ ]*$', '', sub(
'^([ [:digit:]]+)\\.[ ]*([^ ][[:print:]]*)$', '\\2',
categorystrings))
return(as.data.table(list(
category=category,
shortname=shortname,
description=shortname)))
} else {
if (is.factor(categorystrings)){
# Use the underlying numbers
category <- unclass(categorystrings)
return(as.data.table(list(category=category,
shortname=iconv(as.character(categorystrings), sub='byte'),
description=iconv(as.character(categorystrings), sub='byte'))))
} else {
# Make up category numbers if none available
return(as.data.table(list(
category=as.integer(as.factor(categorystrings)),
shortname=categorystrings,
description=categorystrings)))
}
}
}
subset.codelist <- function(x, subset, select, ...){
# S3 method for subsetting a codelist
# Arguments: x - codelist
# subset - a logical expression for rows to keep,
# default is to keep all rows
# select - a character vector of columns to keep,
# default is to keep all columns
metadata <- extractMetadataFromCodelist(x)
# Ensure that as a minimum, code, medcode if GPRD,
# term and category are kept. It is only valid as a codelist
# if these columns are kept
asCodelist <- FALSE
if (missing(select)){
# keep all columns
select <- colnames(x)
asCodelist <- TRUE
} else {
if (all(c('code', 'term', 'category') %in% select)){
asCodelist <- TRUE
}
if (SOURCEDICTS$dict[metadata$Source ==
SOURCEDICTS$Source] == 'read'){
if (!('medcode' %in% select)){
asCodelist <- FALSE
}
} else if (SOURCEDICTS$dict[metadata$Source ==
SOURCEDICTS$Source] == 'product'){
if (!('prodcode' %in% select)){
asCodelist <- FALSE
}
}
}
if (missing(subset)) {
includeRow <- TRUE
} else {
expr <- substitute(subset)
includeRow <- eval(expr, x, parent.frame())
if (!is.logical(includeRow))
stop("'subset' must evaluate to a Boolean vector")
includeRow[is.na(includeRow)] <- FALSE
}
out <- x[includeRow, select, with = FALSE]
if (is.data.table(out)){
out <- addAttributesToCodelist(out, metadata)
if (asCodelist == TRUE){
# Restore codelist class
setattr(out, 'class', c('codelist', 'data.table',
'data.frame'))
}
}
return(copy(out))
}
getSourceDict <- function(codelist){
# Returns the source dictionary of a codelist
# Uses the SOURCEDICTS data.table
SOURCEDICTS[Source == attr(codelist, 'Source'), dict]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.