Nothing
#' Annotate a dataset.
#'
#' Standard annotation variables are available via the
#' parameter \code{annotType}. Custom dataset/variables of interest
#' are specified via the \code{annotDataset}/\code{annotVar} parameters.
#' @param data Data.frame with input data to annotate.
#' @param dataPath String with path to the data.
#' @param annotations Annotations (or list of those) either as a:
#' \itemize{
#' \item{string with standard annotation type, among:
#' \itemize{
#' \item{demographics: }{standard variables from the demographics data (DM or ADSL) are extracted}
#' \item{exposed_subjects: }{a logical variable: \code{EXFL} is added to \code{data},
#' identifying exposed subjects, i.e. subjects included in the exposure dataset (EX/ADEX)
#' dataset and with non empty and non missing start date ('EXSTDTC', 'STDY' or 'ASTDY')}
#' \item{functional_groups_lab: }{a character variable: 'LBFCTGRP' is added to \code{data}
#' based on standard naming of the parameter code ('PARAMCD' or 'LBTESTCD' variable)}
#' }
#' }
#' \item{list of custom annotation, with:
#' \itemize{
#' \item{(optional) annotation dataset, either:
#' \itemize{
#' \item{'dataset': }{String with name of the annotation dataset,
#' e.g. 'ex' to import data from the file: '[dataset].sas7bdat'in \code{dataPath}}
#' \item{'data': }{Data.frame with annotation dataset}
#' }
#' The input \code{data} is used if 'data' and 'dataset' are not specified.}
#' \item{'vars': }{Either:
#' \itemize{
#' \item{Character vector with variables of interest from annotation dataset.
#' If not specified, all variables of the dataset are considered.}
#' \item{String with new variable name computed from \code{varFct}}
#' }}
#' \item{'varFct': }{(optional) Either:
#' \itemize{
#' \item{function of \code{data} or string containing such function (e.g. 'function(data) ...')}
#' \item{string containing manipulations from column names of \code{data} (e.g. 'col1 + col2')}
#' }
#' used to create a new variable specified in \code{vars}.}
#' \item{'filters': }{(optional) Filters for the \strong{annotation dataset},
#' see \code{filters} parameter of \code{\link{filterData}}.\cr
#' The annotation dataset is first filtered, before being combined to the
#' input \code{data}, such as only the records retained in the annotation dataset
#' will be annotated in the output \code{data}. Other records will
#' have missing values in the annotated variables.
#' }
#' \item{'varLabel': }{(optional) label for new variable in case \code{varFct} is specified.}
#' \item{'varsBy': }{(optional) Character vector with variables used to merge input data and
#' the annotation dataset. If not specified:
#' \itemize{
#' \item{if an external dataset (\code{dataset}/\code{data}) is specified:}{
#' \code{subjectVar} is used}
#' \item{otherwise: }{annotation dataset and input data are merged by rows IDs}
#' }
#' }
#' }
#' }
#' }
#' @param subjectVar String with subject ID variable, 'USUBJID' by default.
#' @param labelVars Named character vector containing variable labels of \code{data}.
#' This will be updated with the labels of the extra annotation variables
#' (in \code{attr(output, 'labelVars')}).
#' @param labelData (optional) String with label for input \code{data},
#' that will be included in progress messages.
#' @inheritParams clinDataReview-common-args
#' @return Annotated \code{data}.
#' If \code{labelVars} is specified, the output contains an
#' extra attribute: 'labelVars'
#' containing updated \code{labelVars} (accessible via: in \code{attr(output, 'labelVars')}).
#' @example inst/examples/annotateData-example.R
#' @importFrom clinUtils loadDataADaMSDTM getLabelVar simpleCap getLabelVars
#' @importFrom tools file_path_sans_ext
#' @export
annotateData <- function(
data,
dataPath = ".",
annotations,
subjectVar = "USUBJID",
verbose = FALSE,
labelVars = NULL,
labelData = "data") {
if(!is.null(annotations)){
# if multiple annotations are specified: nested call
isNest <- ifelse(
is.list(annotations),
is.null(names(annotations)),
length(annotations) > 1
)
if(isNest){
for(par in annotations){
data <- annotateData(
data = data,
dataPath = dataPath,
annotations = par,
subjectVar = subjectVar,
verbose = verbose,
labelVars = labelVars
)
if(!is.null(labelVars))
labelVars <- attr(data, "labelVars")
}
return(data)
}
# custom 'left-join' function without ordering of rows and columns in x
leftJoinBase <- function(x, y, by, ...){
if(any(duplicated(y[, by])))
warning("Duplicated records in annotation dataset for: ",
toString(sQuote(by)), ", this might create replicated rows in the ",
"input data."
)
res <- merge(
x = x, y = y,
all.x = TRUE, all.y = FALSE, # left join
by = by,
sort = FALSE, # doesn't sort rows
...)
colsX <- colnames(x)
cols <- c(colsX, setdiff(colnames(res), colsX))
res <- res[, cols, drop = FALSE]
return(res)
}
if(is.character(annotations)){
switch(annotations,
'demographics' = {
## Requirements: data needs to have a column with "USUBJID", dataPath needs to be specified and must contain dm.sas7bdat
dm <- c("dm", "adsl")
dataDemoPath <- list.files(path = dataPath, pattern = "^(dm|adsl)\\..+$", ignore.case = TRUE, full.names = TRUE)
if(length(dataDemoPath) == 0){
warning(paste(simpleCap(labelData), "is not annotated with demographics data because no such data (ADSL or DM) is available."))
}else{
dataDemoPath <- dataDemoPath[1]
dataAnnotAll <- loadDataADaMSDTM(dataDemoPath, verbose = FALSE)
labelVarsDM <- attr(dataAnnotAll, "labelVars")
dataAnnot <- dataAnnotAll[[1]]
varsDM <- c("AGE","SEX","RACE","ETHNIC","COUNTRY","CNTRY", "SITEID","RFSTDTC","RFENDTC","RFXSTDTC","RFXENDTC")
# ADaM-like: 'A[variable]', e.g. ASEX, AAGE, ...
varsDM <- c(varsDM, paste0("A", varsDM))
varsDM <- intersect(varsDM, colnames(dataAnnot))
if(!subjectVar %in% colnames(dataAnnot))
stop(simpleCap(labelData), " is not annotated with demographics data because doesn't contain variable: ", subjectVar, ".")
dataAnnot <- unique(dataAnnot[, c(subjectVar, varsDM), drop = FALSE])
data <- leftJoinBase(x = data, y = dataAnnot, by = subjectVar)
msgDemo <- paste0(
simpleCap(labelData), " is annotated with demographics data: ",
toString(paste0(getLabelVar(var = varsDM, data = dataAnnot, labelVars = labelVarsDM), " (", sQuote(varsDM), ")")),
", extracted from the ", sQuote(file_path_sans_ext(basename(dataDemoPath))), " dataset."
)
if(verbose) message(msgDemo)
if(!is.null(labelVars)) labelVars <- c(labelVars, labelVarsDM[varsDM])
}
},
'exposed_subjects' = {
## Requirements: data needs to have a column with "USUBJID", dataPath needs to be specified and must contain ex.sas7bdat
## Requirements2: expects that non-exposed individuals have an empty ("") value for EXSTDTC ==> check that only dates and empty values are present
dataExPath <- list.files(path = dataPath, pattern = "^(ex|adex)\\..+$", ignore.case = TRUE, full.names = TRUE)
if(length(dataExPath) == 0){
warning(paste(simpleCap(labelData), "is not annotated with exposure data, because no such data (ADEX or EX) is available."))
}else{
dataExPath <- dataExPath[1]
dataAnnotAll <- loadDataADaMSDTM(dataExPath, verbose = FALSE)
labelVarsEX <- attr(dataAnnotAll, "labelVars")
dataAnnot <- dataAnnotAll[[1]]
if(!subjectVar %in% colnames(dataAnnot))
stop(simpleCap(labelData), " is not annotated with ",
"exposure data because doesn't contain the subject variable: ",
shQuote(subjectVar), ".")
startVar <- c("EXSTDTC", "STDY", "ASTDY")
startVar <- intersect(startVar, colnames(dataAnnot))
if(length(startVar) == 0)
stop(simpleCap(labelData), " is not annotated with exposure",
" data because doesn't contain a start time variable.")
idxWithST <- which(dataAnnot[, startVar] != "" & !is.na(dataAnnot[, startVar]))
dataAnnot <- dataAnnot[idxWithST, ]
data$EXFL <- data[, subjectVar] %in% dataAnnot[, subjectVar]
msgEx <- paste0(
simpleCap(labelData), " is annotated with exposed subjects, ",
"extracted based on subjects with non-missing ",
getLabelVar(var = startVar, data = dataAnnot, labelVars = labelVarsEX), " (", sQuote(startVar), ")",
" in the ", sQuote(file_path_sans_ext(basename(dataExPath))), " dataset."
)
if(verbose) message(msgEx)
if(!is.null(labelVars)) labelVars <- c(labelVars, EXFL = "Exposed subjects")
}
},
'functional_groups_lab' = {
varParam <- c("PARAMCD", "LBTESTCD")
varParam <- intersect(varParam, colnames(data))
if(length(varParam) == 0) {
warning(paste(simpleCap(labelData), "is not annotated with functional groups,",
"because no variable with laboratory parameter code is available in the data.")
)
} else {
labGroups <- list(
"Renal function" = c("UREA","CREAT","CA","URATE","GFR"),
"Electrolytes" = c("SODIUM","K","BICARB","CL"),
"Liver function" = c("ALT","AST","ALP","CPK","BILDIR","BILI","PROT","ALB","HGB"),
"Lipids" = c("CHOL","HDL","LDL","TRIG"),
"Haematology" = c("BASO", "EOS", "HCT", "HGB", "LYM", "MCH", "MCHC", "MCV", "MONO", "NEUT", "PLAT", "RBC", "WBC")
)
labCodeToGroup <- setNames(
rep(names(labGroups), times = sapply(labGroups, length)),
unlist(labGroups)
)
labGroupData <- labCodeToGroup[as.character(data[, varParam])]
labGroupData[is.na(labGroupData)] <- "Other"
labGroupsInData <- intersect(c(names(labGroups), "Other"), unique(labGroupData))
data$LBFCTGRP <- factor(labGroupData, levels = labGroupsInData)
msgAnnot <- paste0(
simpleCap(labelData), " is annotated with functional groups, ",
"based on standard naming of the ",
getLabelVar(var = varParam, data = data, labelVars = labelVars), " (",
sQuote(varParam), ").")
if(verbose) message(msgAnnot)
if(!is.null(labelVars)) labelVars <- c(labelVars, LBFCTGRP = "Functional group")
}
},
stop(
simpleCap(labelData), " is not annotated, because ",
"'annotations' should be one of: ",
toString(sQuote(c("demographics", "exposed_subjects", "functional_groups_lab"))),
" or custom: contains a 'data'/'dataset' element."
)
)
}else{
# annotation by:?
varsBy <- annotations[["varsBy"]]
annotData <- NULL
# extract annotation data
if("dataset" %in% names(annotations)){
annotDataset <- annotations[["dataset"]]
annotDataPath <- list.files(
path = dataPath,
pattern = paste0("^(", annotDataset, ")\\..+$"),
ignore.case = TRUE, full.names = TRUE
)
if(length(annotDataPath) == 0){
warning(paste(simpleCap(labelData), "is not annotated with",
"the specified", shQuote(annotDataset), "dataset, because",
"it is not available in:", dataPath
))
}else{
annotDataAll <- loadDataADaMSDTM(annotDataPath, verbose = FALSE)
annotData <- annotDataAll[[1]]
labelVarsAnnot <- attr(annotDataAll, "labelVars")
if(is.null(varsBy)) varsBy <- subjectVar
}
}else if("data" %in% names(annotations)){
annotDataset <- "custom"
annotData <- annotations$data
labelVarsAnnot <- getLabelVars(data = annotData)
if(is.null(varsBy)) varsBy <- subjectVar
}else{
annotDataset <- "current"
data$idAnnot <- seq_len(nrow(data))
annotData <- data
labelVarsAnnot <- labelVars
if(is.null(varsBy)) varsBy <- "idAnnot"
}
if(!is.null(annotData)){
# filter if required:
annotFilter <- annotations$filters
if(!is.null(annotFilter)){
annotData <- filterData(
data = annotData,
filters = annotFilter,
returnAll = FALSE,
labelVars = labelVarsAnnot
)
labelVarsAnnot <- attr(annotData, "labelVars")
msgFilter <- attr(annotData, "msg")
}
# function to apply to extract new variables
varFct <- annotations$varFct
if(!is.null(varFct)){
if(is.null(annotations$vars) || length(annotations$vars) != 1)
stop("'vars' should be specified and of length 1 for 'varFct':\n",
capture.output(varFct))
varNew <- annotations$vars
if(is.character(varFct)){
varFctToFct <- try(eval(expr = parse(text = varFct)), silent = TRUE)
if(!inherits(varFctToFct, "try-error")){
annotData[[varNew]] <- varFctToFct(annotData)
}else{
annotData[[varNew]] <- eval(
expr = parse(text = varFct),
envir = annotData
)
}
msgVarFct <- varFct
}else if(is.function(varFct)){
annotData[[varNew]] <- varFct(annotData)
msgVarFct <- getFctCode(varFct)
}else stop("'varFct' should be a character or a function.")
# set label:
labelNew <- annotations$varLabel
if(is.null(labelNew)) labelNew <- msgVarFct
labelVarsAnnot[varNew] <- labelNew
msgVarFct <- paste("based on:", msgVarFct)
# remove variable in data if already present
data[[varNew]] <- NULL
}
# if 'vars' not specified, all columns of the annotation dataset are retained:
annotVar <- annotations$vars
if(is.null(annotVar)) annotVar <- colnames(annotData)
annotVar <- setdiff(annotVar, varsBy)
isAnnotInData <- annotVar %in% colnames(data)
if(any(isAnnotInData) & is.null(varFct)){
varAnnotInDataText <- toString(paste0(
getLabelVar(
var = annotVar[isAnnotInData],
data = annotData,
labelVars = labelVarsAnnot
),
" (",
sQuote(annotVar[isAnnotInData]),
")"
))
warning(
simpleCap(labelData), " is not annotated with variable(s): ",
varAnnotInDataText,
" from the ", sQuote(annotDataset), " dataset",
" because they are already available in data."
)
annotVar <- annotVar[!isAnnotInData]
}
if(length(annotVar) > 0){
varsByNotInData <- setdiff(varsBy, colnames(annotData))
if(length(varsByNotInData) > 0)
stop(simpleCap(labelData), " is not annotated with ", sQuote(annotDataset), ", because doesn't contain variable:", toString(sQuote(varsByNotInData)), ".")
annotData <- unique(annotData[, unique(c(varsBy, annotVar)), drop = FALSE])
data <- leftJoinBase(x = data, y = annotData, by = varsBy)
msgAnnot <- paste0(
simpleCap(labelData), " annotated with variable(s): ",
toString(paste0(
getLabelVar(var = annotVar, data = annotData, labelVars = labelVarsAnnot),
" (", sQuote(annotVar), ")"
)),
" from the ", sQuote(annotDataset), " dataset",
if(!is.null(annotations$varFct)) paste0(" ", msgVarFct),
if(!is.null(annotFilter)) paste(" whose", msgFilter),
if(annotDataset != "current"){
paste0(
" based on the variable(s): ",
getLabelVar(var = varsBy, data = annotData, labelVars = labelVarsAnnot),
" (", sQuote(varsBy), ")"
)
},
"."
)
if(verbose) message(msgAnnot)
if(!is.null(labelVars)) labelVars <- c(labelVars, labelVarsAnnot[annotVar])
}
}
if(annotDataset == "current")
data[["idAnnot"]] <- NULL
}
}
if(!is.null(labelVars))
attr(data, "labelVars") <- labelVars
return(data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.