Nothing
# ──────────────────────────────────────────────────────────────────────────────
# ─────────────────────────── Public API ──────────────────────────────────────
# ──────────────────────────────────────────────────────────────────────────────
#' Run the TPC Algorithm for Causal Discovery
#'
#' @description
#' Run a tier-aware variant of the PC algorithm that respects background
#' knowledge about a partial temporal order. Supply the temporal order via a
#' `Knowledge` object.
#'
#' @param data A data frame with the observed variables.
#' @param knowledge A `Knowledge` object created with [knowledge()],
#' encoding tier assignments and optional forbidden/required edges. This is
#' the preferred way to provide temporal background knowledge.
#' @param alpha The alpha level used as the per-test significance
#' threshold for conditional independence testing.
#' @param test A conditional independence test. The default [reg_test()]
#' uses a regression-based information-loss test. Another available option is
#' [cor_test()] which tests for vanishing partial correlations. User-supplied
#' functions may also be used; see details for the required interface.
#' @param suff_stat A sufficient statistic. If supplied, it is passed directly
#' to the test and no statistics are computed from \code{data}. Its structure
#' depends on the chosen \code{test}.
#' @param method Skeleton construction method, one of \code{"stable"},
#' \code{"original"}, or \code{"stable.fast"} (default). See
#' [pcalg::skeleton()] for details.
#' @param na_method Handling of missing values, one of \code{"none"} (default;
#' error on any \code{NA}), \code{"cc"} (complete-case analysis), or
#' \code{"twd"} (test-wise deletion).
#' @param orientation_method Conflict-handling method when orienting edges.
#' Currently only the conservative method is available.
#' @param directed_as_undirected Logical; if \code{TRUE}, treat any directed
#' edges in \code{knowledge} as undirected during skeleton learning. This
#' is due to the fact that \pkg{pcalg} does not allow directed edges in
#' \code{fixedEdges} or \code{fixedGaps}. Default is \code{FALSE}.
#' @param varnames Character vector of variable names. Only needed when
#' \code{data} is not supplied and all information is passed via
#' \code{suff_stat}.
#' @param num_cores Integer number of CPU cores to use for parallel skeleton learning.
#' @param ... Additional arguments passed to
#' [pcalg::skeleton()] during skeleton construction.
#'
#' @details
#' Any independence test implemented in \pkg{pcalg} may be used; see
#' [pcalg::pc()]. When \code{na_method = "twd"}, test-wise deletion is
#' performed: for [cor_test()], each pairwise correlation uses complete cases;
#' for [reg_test()], each conditional test performs its own deletion. If you
#' supply a user-defined \code{test}, you must also provide \code{suff_stat}.
#'
#' Temporal or tiered knowledge enters in two places:
#' \itemize{
#' \item during skeleton estimation, candidate conditioning sets are pruned so
#' they do not contain variables that are strictly after both endpoints;
#' \item during orientation, any cross-tier edge is restricted to point
#' forward in time.
#' }
#'
#' @inheritSection disco_note Recommendation
#' @inheritSection disco_algs_return_doc_pdag Value
#'
#' @example inst/roxygen-examples/tpc-example.R
#'
#' @export
tpc_run <- function(
data = NULL,
knowledge = NULL,
alpha = 0.05,
test = reg_test,
suff_stat = NULL,
method = "stable.fast",
na_method = "none",
orientation_method = "conservative",
directed_as_undirected = FALSE,
varnames = NULL,
num_cores = 1,
...
) {
prep <- constraint_based_prepare_inputs(
data = data,
knowledge = knowledge,
varnames = varnames,
na_method = na_method,
test = test,
suff_stat = suff_stat,
directed_as_undirected = directed_as_undirected,
function_name = "tpc"
)
# unpack returned values
data <- prep$data
knowledge <- prep$knowledge
vnames <- prep$vnames
suff_stat <- prep$suff_stat
na_method <- prep$na_method
directed_as_undirected <- prep$directed_as_undirected
test <- prep$internal_test # Ensure we use the internal test with camelCase so it works downstream with pcalg
knowledge <- prepare_knowledge(knowledge) # Precompute variable ranks for efficient access
# check orientation method
if (!(orientation_method %in% c("standard", "conservative", "maj.rule"))) {
stop(
"Orientation method must be one of standard, conservative or maj.rule."
)
}
# CI test that forbids conditioning on future tiers
indep_test_dir <- dir_test(test, vnames, knowledge)
# pcalg background constraints (forbidden/required) from knowledge
constraints <- .pcalg_constraints_from_knowledge(
knowledge,
labels = vnames,
directed_as_undirected = directed_as_undirected
)
# learn skeleton
skel <- pcalg::skeleton(
suffStat = suff_stat,
indepTest = indep_test_dir,
alpha = alpha,
labels = vnames,
method = method,
fixedGaps = constraints$fixed_gaps,
fixedEdges = constraints$fixed_edges,
numCores = num_cores,
...
)
ntests <- sum(skel@n.edgetests)
res <- tpdag(skel, knowledge = knowledge)
# caugi assumes rows are the "from" nodes and columns the "to" nodes, so transpose.
cg <- caugi::as_caugi(t(res), collapse = TRUE, class = "UNKNOWN")
as_disco(cg, knowledge)
}
# ──────────────────────────────────────────────────────────────────────────────
# ──────────────────────────── Helpers ────────────────────────────────────────
# ──────────────────────────────────────────────────────────────────────────────
#' Build tiered knowledge from legacy order prefixes
#'
#' @description
#' Helper that converts a character \code{order} of prefixes into a
#' `Knowledge` object by creating one tier per prefix and assigning
#' variables with \code{tidyselect::starts_with("<prefix>")}.
#'
#' @param order Character vector of prefixes in temporal order.
#' @param data Optional data frame used to freeze the knowledge variable set.
#' @param vnames Optional character vector of variable names when \code{data}
#' is not supplied.
#'
#' @example inst/roxygen-examples/dot-build_knowledge_from_order-example.R
#'
#' @return A `Knowledge` object with tiers matching \code{order}.
#' @keywords internal
#' @noRd
.build_knowledge_from_order <- function(order, data = NULL, vnames = NULL) {
.check_if_pkgs_are_installed(
pkgs = c(
"rlang",
"tidyselect",
"utils"
),
function_name = ".build_knowledge_from_order"
)
checkmate::assert_character(order, min.len = 1)
# build tier specs like: "<lbl>" ~ starts_with("<lbl>")
fmls <- lapply(order, function(lbl) {
rlang::new_formula(
lhs = rlang::expr(!!lbl),
rhs = rlang::expr(tidyselect::starts_with(!!lbl)),
env = rlang::empty_env()
)
})
# if data is provided, delegate to knowledge() with tier() rules
if (!is.null(data)) {
return(rlang::inject(knowledge(data, tier(!!!fmls))))
}
# otherwise, build a bare `Knowledge` object from variable names
if (is.null(vnames) && is.null(data)) {
stop("`data` is NULL, so `vnames` should be provided.")
}
kn <- knowledge() |> add_vars(vnames)
# create tiers in declared order
for (lbl in order) {
if (nrow(kn$tiers) == 0L) {
kn <- add_tier(kn, !!lbl)
} else {
last <- utils::tail(kn$tiers$label, 1)
kn <- rlang::inject(add_tier(kn, !!lbl, after = !!last))
}
}
# assign tiers by prefix match; do not overwrite earlier assignments
for (lbl in order) {
hits <- startsWith(vnames, lbl)
if (any(hits)) {
idx <- match(vnames[hits], kn$vars$var)
unassigned <- is.na(kn$vars$tier[idx])
if (any(unassigned)) {
kn$vars$tier[idx[unassigned]] <- lbl
}
}
}
kn
}
#' Temporally orient unshielded colliders
#'
#' @description
#' Given a CPDAG adjacency matrix and separation sets, orient v-structures
#' that do not contradict the current directions, respecting temporal tiering.
#'
#' @param amat Square adjacency matrix (from-to convention).
#' @param sepsets Separation sets as computed by \pkg{pcalg}.
#'
#' @example inst/roxygen-examples/v_orient_temporal-example.R
#'
#' @return The updated adjacency matrix with additional arrowheads.
#' @keywords internal
#' @noRd
v_orient_temporal <- function(amat, sepsets) {
.check_if_pkgs_are_installed(
pkgs = c(
"gtools"
),
function_name = "v_orient_temporal"
)
vnames <- rownames(amat) # TODO: not used
nvar <- nrow(amat)
for (i in 1:nvar) {
theseAdj <- find_adjacencies(amat, i)
# if there are at least two adjacent nodes
if (length(theseAdj) >= 2) {
adjpairs <- gtools::combinations(length(theseAdj), 2, v = theseAdj)
npairs <- nrow(adjpairs)
if (npairs >= 1) {
for (j in 1:npairs) {
thisPair <- adjpairs[j, ]
j1 <- thisPair[1]
j2 <- thisPair[2]
thisPairAdj <- j2 %in% find_adjacencies(amat, j1)
# if pair is not adjacent (unmarried)
if (!thisPairAdj) {
sepset1 <- sepsets[[j1]][[j2]]
sepset2 <- sepsets[[j2]][[j1]]
# if middle node is not a separator of two other nodes
if (!(i %in% sepset1) && !(i %in% sepset2)) {
# if this does not contradict directional information
# already in the graph
if (amat[i, j1] == 1 && amat[i, j2] == 1) {
amat[j1, i] <- 0
amat[j2, i] <- 0
}
}
}
}
}
}
}
amat
}
#' Find adjacencies of a node in an adjacency matrix
#'
#' @param amatrix Square adjacency matrix.
#' @param index Integer index of the node.
#'
#' @example inst/roxygen-examples/find_adjacencies-example.R
#'
#' @return Integer vector of adjacent node indices.
#' @keywords internal
#' @noRd
find_adjacencies <- function(amatrix, index) {
union(
which(as.logical(amatrix[index, ])),
which(as.logical(amatrix[, index]))
)
}
#' Compute tier indices for variables
#'
#' @description
#' Map variable names to their tier ranks according to a \code{knowledge}
#' object. Variables without a tier receive \code{NA}.
#'
#' @param kn A `Knowledge` object.
#' @param vnames Character vector of variable names.
#'
#' @example inst/roxygen-examples/dot-tier_index-example.R
#'
#' @return Named integer vector of the same length as \code{vnames}.
#' @keywords internal
#' @noRd
.tier_index <- function(kn, vnames) {
is_knowledge(kn)
idx <- match(vnames, kn$vars$var)
tiers <- kn$vars$tier[idx]
rank <- match(tiers, kn$tiers$label)
stats::setNames(rank, vnames)
}
prepare_knowledge <- function(kn) {
is_knowledge(kn)
# Direct variable -> tier rank mapping
kn$.__var_rank <- stats::setNames(
match(kn$vars$tier, kn$tiers$label),
kn$vars$var
)
kn
}
#' Directed indepTest wrapper that forbids conditioning on the future
#'
#' @description
#' Wrap a conditional independence test so that conditioning sets that are
#' strictly after both endpoints are rejected, implementing the temporal
#' restriction during skeleton learning.
#'
#' @param test A function \code{f(x, y, S, suff_stat)} returning a p-value or
#' test statistic compatible with \pkg{pcalg}.
#' @param vnames Character vector of variable names (labels).
#' @param knowledge A `Knowledge` object.
#'
#' @example inst/roxygen-examples/dir_test-example.R
#'
#' @return A function with the same interface as \code{test}.
#' @keywords internal
#' @noRd
dir_test <- function(test, vnames, knowledge) {
vr <- knowledge$.__var_rank
function(x, y, conditioning_set, suff_stat) {
snames <- vnames[conditioning_set]
x_rank <- vr[[vnames[x]]]
y_rank <- vr[[vnames[y]]]
if (length(snames) && !is.na(x_rank) && !is.na(y_rank)) {
for (s in snames) {
s_rank <- vr[[s]]
if (
!is.na(s_rank) &&
s_rank > x_rank &&
s_rank > y_rank
) {
return(0)
}
}
}
do.call(
test,
list(x = x, y = y, S = conditioning_set, suffStat = suff_stat)
)
}
}
#' Convert knowledge to \pkg{pcalg} constraints
#'
#' @description
#' Turn directed forbidden/required edges into undirected \code{fixedGaps} and
#' \code{fixedEdges} matrices in the supplied \code{labels} order. Tier
#' annotations are ignored here; use \code{order_restrict_amat_cpdag()} for
#' tier-based pruning.
#'
#' @param kn A `Knowledge` object.
#' @param labels Character vector of variable names in the desired order.
#'
#' @example inst/roxygen-examples/dot-pcalg_constraints_from_knowledge-example.R
#'
#' @return A list with logical matrices \code{fixedGaps} and \code{fixedEdges}.
#' @keywords internal
#' @noRd
.pcalg_constraints_from_knowledge <- function(
kn,
labels,
directed_as_undirected
) {
kn_undirected <- kn
kn_undirected$vars$tier <- NA_character_
as_pcalg_constraints(
kn_undirected,
labels = labels,
directed_as_undirected = TRUE
)
}
#' Remove disallowed backward edges across tiers
#'
#' @description
#' Apply tier constraints to a CPDAG adjacency matrix by zeroing any entry that
#' points from a later tier into an earlier tier.
#'
#' @param amat Square adjacency matrix (from-to convention).
#' @param knowledge A `Knowledge` object with tier labels.
#'
#' @example inst/roxygen-examples/order_restrict_amat_cpdag-example.R
#'
#' @return The pruned adjacency matrix.
#' @keywords internal
#' @noRd
order_restrict_amat_cpdag <- function(amat, knowledge) {
p <- nrow(amat)
vnames <- rownames(amat)
tr <- .tier_index(knowledge, vnames)
if (all(is.na(tr))) {
return(amat)
}
for (i in seq_len(p)) {
for (j in seq_len(p)) {
if (!is.na(tr[i]) && !is.na(tr[j]) && tr[i] > tr[j]) {
amat[j, i] <- 0
}
}
}
amat
}
#' Orient a CPDAG under temporal background knowledge
#'
#' @description
#' Take a learned skeleton and apply tier-based pruning and v-structure
#' orientation, then delegate to \code{pcalg::addBgKnowledge()} for final
#' orientation under background knowledge.
#'
#' @param skel A [pcalg::pcAlgo-class] skeleton result.
#' @param knowledge A `Knowledge` object with tiers (and optionally edges).
#'
#' @example inst/roxygen-examples/tpdag-example.R
#'
#' @return A [pcalg::pcAlgo-class] object with an oriented graph.
#' @keywords internal
#' @noRd
tpdag <- function(skel, knowledge) {
.check_if_pkgs_are_installed(
pkgs = c(
"pcalg"
),
function_name = "tpdag"
)
cg <- caugi::as_caugi(skel@graph, collapse = TRUE, class = "UNKNOWN")
amat <- caugi::as_adjacency(cg)
skel_amat <- order_restrict_amat_cpdag(
amat,
knowledge = knowledge
)
pcalg::addBgKnowledge(
v_orient_temporal(skel_amat, skel@sepset),
checkInput = FALSE
)
}
#' Construct sufficient statistics for built-in CI tests
#'
#' @description
#' Build the \emph{sufficient statistic} object expected by the built-in
#' conditional independence tests. Supports:
#' \itemize{
#' \item \code{type = "reg_test"} - returns the original \code{data} and a
#' logical indicator of which variables are binary;
#' \item \code{type = "cor_test"} - returns a pairwise-complete correlation
#' matrix and the sample size \code{n}.
#' }
#'
#' @param data A data frame (or numeric matrix) of variables used by the test.
#' Columns are variables; rows are observations.
#' @param type A string selecting the test family. Must be either
#' \code{"reg_test"} or \code{"cor_test"}.
#' @param ... currently ignored.
#'
#' @details
#' For \code{type = "reg_test"}, the return value is a list with elements:
#' \itemize{
#' \item \code{data} - the original \code{data} object;
#' \item \code{binary} - a logical vector (one per column) indicating whether
#' the variable is binary.
#' }
#'
#' For \code{type = "cor_test"}, the return value is a list with elements:
#' \itemize{
#' \item \code{C} - the correlation matrix computed with
#' \code{use = "pairwise.complete.obs"};
#' \item \code{n} - the number of rows in \code{data}.
#' }
#'
#' Any other \code{type} results in an error.
#'
#' @example inst/roxygen-examples/make_suff_stat-example.R
#'
#' @return A list whose structure depends on \code{type}, suitable for passing
#' as \code{suff_stat} to the corresponding test.
#'
#' @keywords internal
#' @noRd
make_suff_stat <- function(data, type, ...) {
.check_if_pkgs_are_installed(
pkgs = c(
"stats"
),
function_name = "make_suff_stat"
)
if (type == "reg_test") {
bin <- unlist(sapply(
data,
function(x) length(unique(stats::na.omit(x))) == 2
))
suff <- list(data = data, binary = bin)
} else if (type == "cor_test") {
suff <- list(
C = stats::cor(data, use = "pairwise.complete.obs"),
n = nrow(data)
)
} else {
stop(paste(
type,
"is not a supported type for autogenerating a sufficient statistic."
))
}
suff
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.