R/trace_indicators.R

Defines functions trace_indicator_scores

Documented in trace_indicator_scores

#' Compute Trace Indicator Scores
#'
#' Computes summary metrics from evidence rows generated by
#' [trace_model_to_evidence()]. Returns key traceability indicators.
#'
#' @param evidence A data.frame of evidence rows (must contain `indicator_id`
#'   and `metric_value` columns).
#'
#' @return A tibble with columns: `indicator`, `value`, `description`.
#'
#' @examples
#' library(r4subcore)
#' ctx <- r4sub_run_context(study_id = "TEST001", environment = "DEV")
#' adam_meta <- data.frame(
#'   dataset = "ADSL", variable = c("STUDYID", "AGE", "AGEGR1"),
#'   label = c("Study ID", "Age", "Age Group")
#' )
#' sdtm_meta <- data.frame(
#'   dataset = "DM", variable = c("STUDYID", "AGE"),
#'   label = c("Study ID", "Age")
#' )
#' map <- data.frame(
#'   adam_dataset = "ADSL", adam_var = c("STUDYID", "AGE"),
#'   sdtm_domain = "DM",   sdtm_var = c("STUDYID", "AGE")
#' )
#' tm <- build_trace_model(adam_meta, sdtm_meta, mapping = map)
#' ev <- trace_model_to_evidence(tm, ctx = ctx)
#' trace_indicator_scores(ev)
#'
#' @export
trace_indicator_scores <- function(evidence) {
  if (!is.data.frame(evidence)) {
    cli::cli_abort("{.arg evidence} must be a data.frame.")
  }

  # Extract trace level rows
  level_rows <- evidence[evidence$indicator_id == "TRACE_LEVEL", , drop = FALSE]

  # Extract diagnostic rows
  orphan_rows <- evidence[evidence$indicator_id == "TRACE_ORPHAN_VAR", , drop = FALSE]
  ambig_rows  <- evidence[evidence$indicator_id == "TRACE_AMBIGUOUS_MAPPING", , drop = FALSE]

  n_vars <- nrow(level_rows)
  orphan_count <- nrow(orphan_rows)
  ambig_count  <- nrow(ambig_rows)

  if (n_vars > 0L) {
    levels <- level_rows$metric_value
    coverage_L2plus <- mean(levels >= 2, na.rm = TRUE)
    coverage_L3plus <- mean(levels >= 3, na.rm = TRUE)
    mean_level      <- mean(levels, na.rm = TRUE)
  } else {
    coverage_L2plus <- NA_real_
    coverage_L3plus <- NA_real_
    mean_level      <- NA_real_
  }

  tibble::tibble(
    indicator = c(
      "TRACE_VAR_COVERAGE_L2PLUS",
      "TRACE_VAR_COVERAGE_L3PLUS",
      "TRACE_ORPHAN_VAR_COUNT",
      "TRACE_AMBIGUOUS_MAPPING_COUNT",
      "TRACE_MEAN_TRACE_LEVEL"
    ),
    value = c(
      coverage_L2plus,
      coverage_L3plus,
      as.numeric(orphan_count),
      as.numeric(ambig_count),
      mean_level
    ),
    description = c(
      "Proportion of ADaM variables with trace level >= L2",
      "Proportion of ADaM variables with trace level >= L3",
      "Number of orphan ADaM variables with no SDTM mapping",
      "Number of ADaM variables mapped to multiple SDTM sources",
      "Mean trace level across all ADaM variables"
    )
  )
}

Try the r4subtrace package in your browser

Any scripts or data that you put into this service are public.

r4subtrace documentation built on March 4, 2026, 1:07 a.m.