Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5
)
## ----setup--------------------------------------------------------------------
library(xplainfi)
library(DiagrammeR)
library(mlr3learners)
set.seed(123)
## ----overview-table, echo=FALSE-----------------------------------------------
dgp_overview <- data.frame(
DGP = c(
"sim_dgp_correlated",
"sim_dgp_mediated",
"sim_dgp_confounded",
"sim_dgp_interactions",
"sim_dgp_independent",
"sim_dgp_ewald"
),
Challenge = c(
"Spurious correlation",
"Mediation effects",
"Confounding",
"Interaction effects",
"Baseline (no challenges)",
"Mixed effects"
),
`PFI Behavior` = c(
"High for spurious x2",
"Shows total effects",
"Biased upward",
"Low (no main effects)",
"Accurate",
"Mixed"
),
`CFI Behavior` = c(
"Low for spurious x2",
"Shows direct effects",
"Less biased",
"High (captures interactions)",
"Accurate",
"Mixed"
),
check.names = FALSE
)
knitr::kable(dgp_overview, caption = "Overview of simulation settings and expected method behavior")
## ----dag-correlated, echo=FALSE, fig.cap="DAG for correlated features DGP", fig.width=10, fig.height=4----
grViz(
"
digraph Correlated {
rankdir=LR;
graph [ranksep=1.5];
node [shape=circle, style=filled, fontsize=14, width=1.2];
X1 [fillcolor='lightcoral', label='X₁\n(β=2.0)'];
X2 [fillcolor='pink', label='X₂\n(β=0)'];
X3 [fillcolor='lightblue', label='X₃\n(β=1.0)'];
X4 [fillcolor='lightgray', label='X₄\n(β=0)'];
Y [fillcolor='greenyellow', label='Y', width=1.5];
X1 -> X2 [color=red, style=bold, label='r≈0.9'];
X1 -> Y [label='2.0'];
X2 -> Y [style=dashed, color=gray, label='0'];
X3 -> Y [label='1.0'];
X4 -> Y [style=dashed, color=gray];
{rank=source; X1; X3; X4}
{rank=same; X2}
{rank=sink; Y}
}"
)
## ----correlated-example-------------------------------------------------------
set.seed(123)
task <- sim_dgp_correlated(n = 500)
# Check correlation between X1 and X2
cor(task$data()[, c("x1", "x2")])
# True coefficients: x1=2.0, x2=0, x3=1.0, x4=0
# Note: x2 is highly correlated with x1 but has NO causal effect!
## ----dag-mediated, echo=FALSE, fig.cap="DAG for mediated effects DGP", fig.width=10, fig.height=4----
grViz(
"
digraph Mediated {
rankdir=LR;
graph [ranksep=1.2];
node [shape=circle, style=filled, fontsize=14, width=1.2];
E [fillcolor='orange', label='Exposure\n(β=0)'];
D [fillcolor='lightblue', label='Direct\n(β=0.5)'];
M [fillcolor='yellow', label='Mediator\n(β=1.5)'];
N [fillcolor='lightgray', label='Noise\n(β=0)'];
Y [fillcolor='greenyellow', label='Y', width=1.5];
E -> M [label='0.8', color=purple, penwidth=2];
D -> M [label='0.6', color=blue];
D -> Y [label='0.5', color=blue];
M -> Y [label='1.5', color=purple, penwidth=2];
N -> Y [style=dashed, color=gray];
{rank=source; E; D; N}
{rank=same; M}
{rank=sink; Y}
}"
)
## ----mediated-example---------------------------------------------------------
set.seed(123)
task <- sim_dgp_mediated(n = 500)
# Calculate total effect of exposure
# Total effect = 0.8 * 1.5 = 1.2 (through mediator)
# Direct effect = 0 (no direct path to Y)
## ----dag-confounded, echo=FALSE, fig.cap="DAG for confounding DGP", fig.width=10, fig.height=5----
grViz(
"
digraph Confounded {
rankdir=LR;
graph [ranksep=1.2, nodesep=0.8];
node [shape=circle, style=filled, fontsize=14, width=1.2];
H [fillcolor='red', label='H\n(Confounder)', style='filled,dashed'];
X1 [fillcolor='lightcoral', label='X₁\n(β=1.0)'];
P [fillcolor='pink', label='Proxy\n(β=0)'];
I [fillcolor='lightblue', label='Independent\n(β=1.0)'];
Y [fillcolor='greenyellow', label='Y', width=1.5];
H -> X1 [color=red, label='1.0'];
H -> P [color=red, style=dashed, label='1.0'];
H -> Y [color=red, label='1.0', penwidth=2];
X1 -> Y [label='1.0'];
I -> Y [label='1.0'];
{rank=source; H}
{rank=same; X1; P; I}
{rank=sink; Y}
}"
)
## ----confounded-example-------------------------------------------------------
set.seed(123)
# Hidden confounder scenario (default)
task_hidden <- sim_dgp_confounded(n = 500, hidden = TRUE)
task_hidden$feature_names # proxy available but not confounder
# Observable confounder scenario
task_observed <- sim_dgp_confounded(n = 500, hidden = FALSE)
task_observed$feature_names # both confounder and proxy available
## ----dag-interactions, echo=FALSE, fig.cap="DAG for interaction effects DGP", fig.width=10, fig.height=4----
grViz(
"
digraph Interaction {
rankdir=LR;
graph [ranksep=1.2];
node [shape=circle, style=filled, fontsize=14, width=1.2];
X1 [fillcolor='orange', label='X₁\n(β=0)'];
X2 [fillcolor='orange', label='X₂\n(β=0)'];
X3 [fillcolor='lightblue', label='X₃\n(β=1.0)'];
N1 [fillcolor='lightgray', label='N₁\n(β=0)'];
N2 [fillcolor='lightgray', label='N₂\n(β=0)'];
Y [fillcolor='greenyellow', label='Y', width=1.5];
INT [fillcolor='red', shape=diamond, label='X₁×X₂\n(β=2.0)', width=1.5];
X1 -> INT [color=red, penwidth=2];
X2 -> INT [color=red, penwidth=2];
INT -> Y [color=red, label='2.0', penwidth=2];
X3 -> Y [label='1.0'];
N1 -> Y [style=dashed, color=gray];
N2 -> Y [style=dashed, color=gray];
{rank=source; X1; X2; X3; N1; N2}
{rank=same; INT}
{rank=sink; Y}
}"
)
## ----interactions-example-----------------------------------------------------
set.seed(123)
task <- sim_dgp_interactions(n = 500)
# Note: X1 and X2 have NO main effects
# Their importance comes ONLY through their interaction
## ----dag-independent, echo=FALSE, fig.cap="DAG for independent features DGP", fig.width=10, fig.height=4----
grViz(
"
digraph Independent {
rankdir=LR;
graph [ranksep=1.5];
node [shape=circle, style=filled, fontsize=14, width=1.2];
X1 [fillcolor='lightblue', label='X₁\n(β=2.0)'];
X2 [fillcolor='lightblue', label='X₂\n(β=1.0)'];
X3 [fillcolor='lightblue', label='X₃\n(β=0.5)'];
N1 [fillcolor='lightgray', label='N₁\n(β=0)'];
N2 [fillcolor='lightgray', label='N₂\n(β=0)'];
Y [fillcolor='greenyellow', label='Y', width=1.5];
X1 -> Y [label='2.0', penwidth=3];
X2 -> Y [label='1.0', penwidth=2];
X3 -> Y [label='0.5'];
N1 -> Y [style=dashed, color=gray];
N2 -> Y [style=dashed, color=gray];
{rank=source; X1; X2; X3; N1; N2}
{rank=sink; Y}
}"
)
## ----independent-example------------------------------------------------------
set.seed(123)
task <- sim_dgp_independent(n = 500)
# All methods should rank features consistently:
# important1 > important2 > important3 > unimportant1,2 (approx. 0)
## ----dag-ewald, echo=FALSE, fig.cap="DAG for Ewald et al. (2024) DGP", fig.width=10, fig.height=4----
grViz(
"
digraph Ewald {
rankdir=LR;
graph [ranksep=1.2];
node [shape=circle, style=filled, fontsize=14, width=1.2];
X1 [fillcolor='lightgray', label='X₁\n(β=0)'];
X2 [fillcolor='lightgray', label='X₂\n(β=0)'];
X3 [fillcolor='lightgray', label='X₃\n(β=0)'];
X4 [fillcolor='lightblue', label='X₄\n(β=1.0)'];
X5 [fillcolor='lightblue', label='X₅\n(β=1.0)'];
Y [fillcolor='greenyellow', label='Y', width=1.5];
INT [fillcolor='red', shape=diamond, label='X₄×X₅\n(β=1.0)', width=1.5];
X1 -> X2 [color=gray, label='≈1.0'];
X3 -> X4 [color=gray, label='≈1.0'];
X4 -> Y [label='1.0'];
X5 -> Y [label='1.0'];
X4 -> INT [color=red];
X5 -> INT [color=red];
INT -> Y [color=red, label='1.0'];
{rank=source; X1; X3; X5}
{rank=same; X2; X4}
{rank=same; INT}
{rank=sink; Y}
}"
)
## ----ewald-example------------------------------------------------------------
sim_dgp_ewald(n = 500)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.