R/evidence_schema.R
In r4subcore: Core Data Contracts, Parsers, and Scoring Primitives for Clinical Submission Readiness

Documented in as_evidence bind_evidence canon_result canon_severity evidence_schema evidence_summary validate_evidence

#' Evidence Table Schema Definition
#'
#' Returns the column specification for the R4SUB evidence table. Each element
#' describes a column's expected R type and, where applicable, the set of
#' allowed values.
#'
#' @return A named list. Each element is a list with `type` (character) and
#'   optionally `allowed` (character vector) or `nullable` (logical).
#'
#' @examples
#' str(evidence_schema())
#'
#' @export
evidence_schema <- function() {
  list(
    run_id           = list(type = "character", nullable = FALSE),
    study_id         = list(type = "character", nullable = FALSE),
    asset_type       = list(type = "character", nullable = FALSE,
                            allowed = c("dataset", "define", "program",
                                        "validation", "spec", "other")),
    asset_id         = list(type = "character", nullable = FALSE),
    source_name      = list(type = "character", nullable = FALSE),
    source_version   = list(type = "character", nullable = TRUE),
    indicator_id     = list(type = "character", nullable = FALSE),
    indicator_name   = list(type = "character", nullable = FALSE),
    indicator_domain = list(type = "character", nullable = FALSE,
                            allowed = c("quality", "trace", "risk", "usability")),
    severity         = list(type = "character", nullable = FALSE,
                            allowed = c("info", "low", "medium", "high", "critical")),
    result           = list(type = "character", nullable = FALSE,
                            allowed = c("pass", "fail", "warn", "na")),
    metric_value     = list(type = "double",    nullable = TRUE),
    metric_unit      = list(type = "character", nullable = TRUE),
    message          = list(type = "character", nullable = TRUE),
    location         = list(type = "character", nullable = TRUE),
    evidence_payload = list(type = "character", nullable = TRUE),
    created_at       = list(type = "POSIXct",   nullable = FALSE)
  )
}

#' Canonical Severity Values
#'
#' Maps common severity labels (case-insensitive) to the canonical set.
#'
#' @param x Character vector of severity values.
#' @return Character vector with canonical severity labels.
#'
#' @examples
#' canon_severity(c("HIGH", "Low", "warning", "Error"))
#'
#' @importFrom cli cli_abort
#' @export
canon_severity <- function(x) {
  map <- c(
    "info"     = "info",     "information" = "info",   "note" = "info",
    "low"      = "low",      "minor"       = "low",
    "medium"   = "medium",   "moderate"    = "medium", "warning" = "medium",
    "warn"     = "medium",
    "high"     = "high",     "major"       = "high",   "error" = "high",
    "critical" = "critical", "severe"      = "critical"
  )
  lower <- tolower(trimws(x))
  out <- unname(map[lower])
  bad <- is.na(out) & !is.na(x)
  if (any(bad)) {
    bad_vals <- paste(unique(x[bad]), collapse = ", ")
    allowed_vals <- paste(unique(map), collapse = ", ")
    cli::cli_abort(
      "Unknown severity value(s): {bad_vals}. Expected one of: {allowed_vals} (or common aliases)."
    )
  }
  out
}

#' Canonical Result Values
#'
#' Maps common result/status labels to the canonical set:
#' `pass`, `fail`, `warn`, `na`.
#'
#' @param x Character vector of result values.
#' @return Character vector with canonical result labels.
#'
#' @examples
#' canon_result(c("PASS", "Failed", "Warning", "N/A"))
#'
#' @export
canon_result <- function(x) {
  map <- c(
    "pass"    = "pass",   "passed" = "pass",  "ok" = "pass",  "success" = "pass",
    "fail"    = "fail",   "failed" = "fail",  "error" = "fail",
    "warn"    = "warn",   "warning" = "warn",
    "na"      = "na",     "n/a" = "na",       "not applicable" = "na",
    "missing" = "na"
  )

  lower <- tolower(trimws(x))
  out <- unname(map[lower])
  bad <- is.na(out) & !is.na(x)
  if (any(bad)) {
    bad_vals <- paste(unique(x[bad]), collapse = ", ")
    allowed_vals <- paste(unique(map), collapse = ", ")
    cli::cli_abort(
      "Unknown result value(s): {bad_vals}. Expected one of: {allowed_vals} (or common aliases)."
    )
  }
  out
}

#' Coerce to Evidence Table
#'
#' Takes a data.frame and coerces it into a valid evidence table. Fills in
#' missing nullable columns with `NA` of the correct type and validates
#' controlled vocabulary columns.
#'
#' @param x A data.frame (or tibble) with at least the required evidence
#'   columns.
#' @param ctx An optional [r4sub_run_context]. If provided, `run_id` and
#'   `study_id` are filled from the context when missing.
#' @param ... Additional columns to set (e.g., `asset_type = "validation"`).
#'
#' @return A data.frame conforming to the evidence schema.
#'
#' @examples
#' ctx <- r4sub_run_context("STUDY1", "DEV")
#' df <- data.frame(
#'   asset_type = "validation",
#'   asset_id = "ADSL",
#'   source_name = "pinnacle21",
#'   indicator_id = "P21-001",
#'   indicator_name = "Missing variable",
#'   indicator_domain = "quality",
#'   severity = "high",
#'   result = "fail",
#'   message = "Variable AGEU missing",
#'   stringsAsFactors = FALSE
#' )
#' ev <- as_evidence(df, ctx = ctx)
#'
#' @importFrom cli cli_alert_success
#' @export
as_evidence <- function(x, ctx = NULL, ...) {
  stopifnot(is.data.frame(x))
  schema <- evidence_schema()

  dots <- list(...)
  for (nm in names(dots)) {
    x[[nm]] <- dots[[nm]]
  }

  # Fill from context
  if (!is.null(ctx)) {
    if (!"run_id" %in% names(x) || all(is.na(x$run_id))) {
      x$run_id <- ctx$run_id
    }
    if (!"study_id" %in% names(x) || all(is.na(x$study_id))) {
      x$study_id <- ctx$study_id
    }
  }

  # Fill missing nullable columns
  for (col in names(schema)) {
    spec <- schema[[col]]
    if (!col %in% names(x)) {
      if (isTRUE(spec$nullable)) {
        x[[col]] <- switch(
          spec$type,
          "character" = NA_character_,
          "double"    = NA_real_,
          "POSIXct"   = as.POSIXct(NA),
          NA
        )
      }
    }
  }

  # Fill created_at if missing
  if (!"created_at" %in% names(x) || all(is.na(x$created_at))) {
    x$created_at <- Sys.time()
  }

  # Fill evidence_payload if NA
  if ("evidence_payload" %in% names(x)) {
    x$evidence_payload[is.na(x$evidence_payload)] <- "{}"
  }

  # Coerce types
  for (col in names(schema)) {
    spec <- schema[[col]]
    if (col %in% names(x)) {
      x[[col]] <- switch(
        spec$type,
        "character" = as.character(x[[col]]),
        "double"    = as.double(x[[col]]),
        "POSIXct"   = as.POSIXct(x[[col]]),
        x[[col]]
      )
    }
  }

  # Reorder to schema order
  schema_cols <- names(schema)
  extra_cols <- setdiff(names(x), schema_cols)
  x <- x[, c(intersect(schema_cols, names(x)), extra_cols), drop = FALSE]
  rownames(x) <- NULL

  validate_evidence(x)
  cli::cli_alert_success("Evidence table created: {nrow(x)} row{?s}")
  x
}

#' Validate Evidence Table
#'
#' Checks that a data.frame conforms to the evidence schema. Verifies column
#' presence, types, and controlled vocabulary values.
#'
#' @param ev A data.frame to validate.
#' @return `TRUE` invisibly if valid; throws an error otherwise.
#'
#' @examples
#' ctx <- suppressMessages(r4sub_run_context("STUDY1", "DEV"))
#' ev <- suppressMessages(as_evidence(
#'   data.frame(
#'     asset_type = "validation", asset_id = "ADSL",
#'     source_name = "pinnacle21", indicator_id = "SD0001",
#'     indicator_name = "SD0001", indicator_domain = "quality",
#'     severity = "high", result = "fail",
#'     stringsAsFactors = FALSE
#'   ),
#'   ctx = ctx
#' ))
#' validate_evidence(ev)
#'
#' @importFrom cli cli_abort
#' @export
validate_evidence <- function(ev) {
  stopifnot(is.data.frame(ev))
  schema <- evidence_schema()

  # Check required columns exist
  required <- names(schema)[!vapply(schema, function(s) isTRUE(s$nullable), logical(1))]
  missing_cols <- setdiff(required, names(ev))
  if (length(missing_cols) > 0) {
    cli::cli_abort(
      "Evidence table is missing required column{?s}: {.field {missing_cols}}"
    )
  }

  # Check all schema columns that exist have correct type
  for (col in intersect(names(schema), names(ev))) {
    spec <- schema[[col]]
    vals <- ev[[col]]

    # Type check
    type_ok <- switch(
      spec$type,
      "character" = is.character(vals),
      "double"    = is.numeric(vals),
      "POSIXct"   = inherits(vals, "POSIXct"),
      TRUE
    )
    if (!type_ok) {
      cli::cli_abort(
        "Column {.field {col}} must be {.cls {spec$type}}, got {.cls {class(vals)[1]}}"
      )
    }

    # Nullable check
    if (!isTRUE(spec$nullable) && any(is.na(vals))) {
      cli::cli_abort("Column {.field {col}} must not contain NA values")
    }

    # Allowed values check
    if (!is.null(spec$allowed)) {
      non_na <- vals[!is.na(vals)]
      bad <- setdiff(unique(non_na), spec$allowed)
      if (length(bad) > 0) {
        bad_str <- paste(bad, collapse = ", ")
        allowed_str <- paste(spec$allowed, collapse = ", ")
        cli::cli_abort(
          "Column {.field {col}} has invalid value(s): {bad_str}. Allowed: {allowed_str}"
        )
      }
    }
  }

  invisible(TRUE)
}

#' Bind Evidence Tables
#'
#' Row-binds multiple evidence data.frames after validating each one.
#'
#' @param ... Evidence data.frames to bind.
#' @return A single combined evidence data.frame.
#'
#' @examples
#' ctx <- suppressMessages(r4sub_run_context("STUDY1", "DEV"))
#' make_ev <- function(ind_id) {
#'   suppressMessages(as_evidence(
#'     data.frame(
#'       asset_type = "validation", asset_id = "ADSL",
#'       source_name = "pinnacle21", indicator_id = ind_id,
#'       indicator_name = ind_id, indicator_domain = "quality",
#'       severity = "low", result = "pass",
#'       stringsAsFactors = FALSE
#'     ),
#'     ctx = ctx
#'   ))
#' }
#' ev1 <- make_ev("IND-001")
#' ev2 <- make_ev("IND-002")
#' combined <- suppressMessages(bind_evidence(ev1, ev2))
#' nrow(combined)
#'
#' @importFrom cli cli_alert_success
#' @export
bind_evidence <- function(...) {
  frames <- list(...)
  for (i in seq_along(frames)) {
    validate_evidence(frames[[i]])
  }
  combined <- do.call(rbind, frames)
  rownames(combined) <- NULL
  cli::cli_alert_success("Bound {length(frames)} evidence table{?s}: {nrow(combined)} total row{?s}")
  combined
}

#' Summarize Evidence
#'
#' Returns a summary data.frame with counts grouped by domain, severity,
#' result, and source.
#'
#' @param ev A valid evidence data.frame.
#' @return A data.frame with columns: `indicator_domain`, `severity`, `result`,
#'   `source_name`, and `n`.
#'
#' @examples
#' ctx <- suppressMessages(r4sub_run_context("STUDY1", "DEV"))
#' ev <- suppressMessages(as_evidence(
#'   data.frame(
#'     asset_type = "validation", asset_id = "ADSL",
#'     source_name = "pinnacle21", indicator_id = "SD0001",
#'     indicator_name = "SD0001", indicator_domain = "quality",
#'     severity = "high", result = "fail",
#'     stringsAsFactors = FALSE
#'   ),
#'   ctx = ctx
#' ))
#' evidence_summary(ev)
#'
#' @export
evidence_summary <- function(ev) {
  validate_evidence(ev)

  # Base R aggregation using a separator unlikely to appear in data
  sep <- "\x01"
  groups <- interaction(
    ev$indicator_domain, ev$severity, ev$result, ev$source_name,
    drop = TRUE, sep = sep
  )
  counts <- as.data.frame(table(groups), stringsAsFactors = FALSE)
  if (nrow(counts) == 0) {
    return(data.frame(
      indicator_domain = character(0),
      severity = character(0),
      result = character(0),
      source_name = character(0),
      n = integer(0),
      stringsAsFactors = FALSE
    ))
  }
  parts <- strsplit(counts$groups, sep, fixed = TRUE)
  data.frame(
    indicator_domain = vapply(parts, `[`, character(1), 1),
    severity         = vapply(parts, `[`, character(1), 2),
    result           = vapply(parts, `[`, character(1), 3),
    source_name      = vapply(parts, `[`, character(1), 4),
    n                = as.integer(counts$Freq),
    stringsAsFactors = FALSE
  )
}

Any scripts or data that you put into this service are public.

r4subcore documentation built on Feb. 20, 2026, 5:09 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

r4subcore
Core Data Contracts, Parsers, and Scoring Primitives for Clinical Submission Readiness

R/evidence_schema.R
In r4subcore: Core Data Contracts, Parsers, and Scoring Primitives for Clinical Submission Readiness

Defines functions evidence_summary bind_evidence validate_evidence as_evidence canon_result canon_severity evidence_schema

Documented in as_evidence bind_evidence canon_result canon_severity evidence_schema evidence_summary validate_evidence

Try the r4subcore package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

r4subcore Core Data Contracts, Parsers, and Scoring Primitives for Clinical Submission Readiness

R/evidence_schema.R In r4subcore: Core Data Contracts, Parsers, and Scoring Primitives for Clinical Submission Readiness

Defines functions evidence_summary bind_evidence validate_evidence as_evidence canon_result canon_severity evidence_schema

Documented in as_evidence bind_evidence canon_result canon_severity evidence_schema evidence_summary validate_evidence

Try the r4subcore package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

r4subcore
Core Data Contracts, Parsers, and Scoring Primitives for Clinical Submission Readiness

R/evidence_schema.R
In r4subcore: Core Data Contracts, Parsers, and Scoring Primitives for Clinical Submission Readiness