R/cdisc_metadata.R
In clinCompare: Dataset Comparison with 'CDISC' Validation for Clinical Trial Data

Documented in extract_cdisc_version get_adam_metadata get_sdtm_metadata

# Package-level cache to avoid re-reading CSV on every call
.metadata_cache <- new.env(parent = emptyenv())

#' Load and Cache CSV Metadata
#'
#' @param filename CSV filename inside inst/extdata/
#' @param key_col Column used to split into a named list (domain or dataset)
#' @param cache_key Name used for the internal cache entry
#' @return Named list of data frames, one per domain/dataset.
#' @noRd
.load_metadata_csv <- function(filename, key_col, cache_key) {
  if (exists(cache_key, envir = .metadata_cache)) {
    return(get(cache_key, envir = .metadata_cache))
  }

  csv_path <- system.file("extdata", filename, package = "clinCompare")
  if (!nzchar(csv_path) || !file.exists(csv_path)) {
    stop("Metadata file not found: ", filename,
         ". Is clinCompare installed correctly?", call. = FALSE)
  }

  raw <- utils::read.csv(csv_path, stringsAsFactors = FALSE)
  result <- split(raw[, setdiff(names(raw), key_col)], raw[[key_col]])
  # Drop the split factor so each element is a plain data.frame

  result <- lapply(result, function(df) {
    rownames(df) <- NULL
    df
  })

  assign(cache_key, result, envir = .metadata_cache)
  return(result)
}


#' SDTM Metadata
#'
#' Returns metadata for SDTM domains following CDISC standards.
#' Provides information about required, expected, and permissible variables
#' for each SDTM domain.
#'
#' Variable definitions are based on the published CDISC SDTM Implementation
#' Guide. The canonical machine-readable source is the CDISC Library API
#' (\url{https://www.cdisc.org/cdisc-library}), which requires CDISC
#' membership. The metadata shipped with clinCompare is hand-curated from the
#' published IG specifications and should be cross-referenced with the
#' official CDISC Library for regulatory submissions.
#'
#' @param version Character string specifying the SDTM IG version.
#'   Supported values: "3.4" (default, based on SDTM v2.0),
#'   "3.3" (based on SDTM v1.7). Version "3.3" excludes 7 domains
#'   introduced in v3.4 (GF, CP, BE, BS, SM, TD, TM). Within a
#'   domain, the variable lists are the same across versions -- this
#'   parameter only controls which domains are available, not
#'   per-variable version differences.
#'
#' @return A named list where keys are SDTM domain codes and values are
#'   data.frames with columns:
#'   \describe{
#'     \item{variable}{Variable name (character)}
#'     \item{label}{Variable label/description (character)}
#'     \item{type}{Data type: "Char" for character or "Num" for numeric}
#'     \item{core}{Importance level: "Req" (Required), "Exp" (Expected),
#'       or "Perm" (Permissible)}
#'   }
#'
#' @keywords internal
get_sdtm_metadata <- function(version = "3.4") {
  version <- match.arg(version, choices = c("3.4", "3.3"))

  all_domains <- .load_metadata_csv("sdtm_metadata.csv", "domain", "sdtm_all")

  # Drop the min_ig_version helper column from each data.frame
  all_domains <- lapply(all_domains, function(df) {
    df[, c("variable", "label", "type", "core"), drop = FALSE]
  })

  if (version == "3.3") {
    v34_only <- c("GF", "CP", "BE", "BS", "SM", "TD", "TM")
    all_domains[v34_only] <- NULL
  }

  return(all_domains)
}


#' ADaM Metadata
#'
#' Returns metadata for ADaM datasets following CDISC standards.
#' Provides information about required, conditional, and other variables
#' for each ADaM analysis dataset.
#'
#' Variable definitions are based on the published CDISC ADaM Implementation
#' Guide. The canonical machine-readable source is the CDISC Library API
#' (\url{https://www.cdisc.org/cdisc-library}), which requires CDISC
#' membership. The metadata shipped with clinCompare is hand-curated from the
#' published IG specifications.
#'
#' @param version Character string specifying the ADaM IG version.
#'   Supported values: "1.3" (default), "1.2", "1.1".
#'
#'   \strong{Note:} All versions currently return identical variable
#'   definitions. The ADaM IG revisions (1.1 -> 1.3) changed guidance
#'   and rules but not the core variable inventory. The parameter exists
#'   for provenance tracking only -- it does not enable version-specific
#'   validation.
#'
#' @return A named list where keys are ADaM dataset names and values are
#'   data.frames with columns:
#'   \describe{
#'     \item{variable}{Variable name (character)}
#'     \item{label}{Variable label/description (character)}
#'     \item{type}{Data type: "Char" for character or "Num" for numeric}
#'     \item{core}{Importance level: "Req" (Required), "Cond" (Conditional)}
#'   }
#'
#' @keywords internal
get_adam_metadata <- function(version = "1.3") {
  version <- match.arg(version, choices = c("1.3", "1.2", "1.1"))

  .load_metadata_csv("adam_metadata.csv", "dataset", "adam_all")
}


#' Extract CDISC Version from TS Domain
#'
#' @description
#' Reads a Trial Summary (TS) dataset and extracts the CDISC standard version
#' information. Looks for SDTM IG version (TSPARMCD = "SDTIGVER" or "CDISCVER")
#' and ADaM IG version (TSPARMCD = "ADAMIGVR") parameters.
#'
#' @param ts_data A data frame representing a TS (Trial Summary) domain.
#'   Must contain at minimum TSPARMCD and TSVAL columns.
#'
#' @return A list containing:
#' \item{sdtm_ig_version}{Character: SDTM IG version (e.g., "3.4"), or NA}
#' \item{adam_ig_version}{Character: ADaM IG version (e.g., "1.3"), or NA}
#' \item{study_id}{Character: STUDYID from TS if available, or NA}
#' \item{protocol_title}{Character: Protocol title if available, or NA}
#' \item{version_note}{Character: Formatted note string for reports}
#'
#' @keywords internal
extract_cdisc_version <- function(ts_data) {
  if (!is.data.frame(ts_data)) {
    stop("ts_data must be a data frame", call. = FALSE)
  }

  # Normalize column names to uppercase for case-insensitive matching
  col_upper <- toupper(colnames(ts_data))

  # Check for required columns
  parmcd_col <- which(col_upper == "TSPARMCD")
  val_col <- which(col_upper == "TSVAL")

  if (length(parmcd_col) == 0 || length(val_col) == 0) {
    warning("TS data must contain TSPARMCD and TSVAL columns", call. = FALSE)
    return(list(
      sdtm_ig_version = NA_character_,
      adam_ig_version = NA_character_,
      study_id = NA_character_,
      protocol_title = NA_character_,
      version_note = ""
    ))
  }

  parmcd_col <- parmcd_col[1]
  val_col <- val_col[1]

  parmcds <- toupper(as.character(ts_data[[parmcd_col]]))
  vals <- as.character(ts_data[[val_col]])

  # Helper to look up a parameter value
  get_ts_val <- function(codes) {
    for (code in codes) {
      idx <- which(parmcds == code)
      if (length(idx) > 0) {
        return(vals[idx[1]])
      }
    }
    return(NA_character_)
  }

  sdtm_ver <- get_ts_val(c("SDTIGVER", "CDISCVER"))
  adam_ver <- get_ts_val(c("ADAMIGVR"))
  study_id <- get_ts_val(c("STUDYID"))
  protocol_title <- get_ts_val(c("TITLE"))

  # Also try STUDYID column directly if not found as a parameter
  studyid_col <- which(col_upper == "STUDYID")
  if (is.na(study_id) && length(studyid_col) > 0 && nrow(ts_data) > 0) {
    study_id <- as.character(ts_data[[studyid_col[1]]][1])
  }

  # Build version note
  parts <- character(0)
  if (!is.na(sdtm_ver)) {
    parts <- c(parts, paste0("SDTM IG ", sdtm_ver))
  }
  if (!is.na(adam_ver)) {
    parts <- c(parts, paste0("ADaM IG ", adam_ver))
  }

  if (length(parts) > 0) {
    version_note <- paste0("CDISC Version (from TS): ", paste(parts, collapse = ", "))
  } else {
    version_note <- ""
  }

  return(list(
    sdtm_ig_version = sdtm_ver,
    adam_ig_version = adam_ver,
    study_id = study_id,
    protocol_title = protocol_title,
    version_note = version_note
  ))
}

Any scripts or data that you put into this service are public.

clinCompare documentation built on Feb. 19, 2026, 1:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

clinCompare
Dataset Comparison with 'CDISC' Validation for Clinical Trial Data

R/cdisc_metadata.R
In clinCompare: Dataset Comparison with 'CDISC' Validation for Clinical Trial Data

Defines functions extract_cdisc_version get_adam_metadata get_sdtm_metadata .load_metadata_csv

Documented in extract_cdisc_version get_adam_metadata get_sdtm_metadata

Try the clinCompare package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

clinCompare Dataset Comparison with 'CDISC' Validation for Clinical Trial Data

R/cdisc_metadata.R In clinCompare: Dataset Comparison with 'CDISC' Validation for Clinical Trial Data

Defines functions extract_cdisc_version get_adam_metadata get_sdtm_metadata .load_metadata_csv

Documented in extract_cdisc_version get_adam_metadata get_sdtm_metadata

Try the clinCompare package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

clinCompare
Dataset Comparison with 'CDISC' Validation for Clinical Trial Data

R/cdisc_metadata.R
In clinCompare: Dataset Comparison with 'CDISC' Validation for Clinical Trial Data