Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5
)
## ----load-libraries-----------------------------------------------------------
library(xplainfi)
library(mlr3)
library(mlr3learners)
library(data.table)
# Create a task for demonstration
task_mixed = tsk("penguins")
task_numeric = sim_dgp_correlated(n = 200)
## ----feature-types------------------------------------------------------------
# Check supported feature types for different samplers
task_mixed$feature_types
permutation = MarginalPermutationSampler$new(task_mixed)
permutation$feature_types
## ----sample-methods-----------------------------------------------------------
# Sample from stored task (using row_ids)
sampled_task = permutation$sample(
feature = "bill_length",
row_ids = 40:45
)
sampled_task
# Sample from "external" data
test_data = task_mixed$data(rows = 40:45)
sampled_external = permutation$sample_newdata(
feature = "bill_length",
newdata = test_data
)
sampled_external
## ----permutation-example------------------------------------------------------
# Create permutation sampler
permutation = MarginalPermutationSampler$new(task_mixed)
# Sample a continuous feature
original = task_mixed$data(rows = 1:10)
sampled = permutation$sample("bill_length", row_ids = 1:10)
# Compare original and sampled values
data.table(
original_bill = original$bill_length,
sampled_bill = sampled$bill_length,
sex = original$sex # Unchanged
)
## ----marginal-ref-example-----------------------------------------------------
# Create marginal reference sampler with n_samples reference pool
marginal_ref = MarginalReferenceSampler$new(task_mixed, n_samples = 30L)
# Sample a feature - each row gets values from a randomly sampled reference row
original = task_mixed$data(rows = 1:5)
sampled = marginal_ref$sample("bill_length", row_ids = 1:5)
# Compare
data.table(
original_bill = original$bill_length,
sampled_bill = sampled$bill_length,
sex = original$sex # Unchanged
)
## ----correlation-preservation-------------------------------------------------
# Sample with MarginalPermutationSampler (breaks correlations)
perm = MarginalPermutationSampler$new(task_numeric)
sampled_perm = perm$sample(c("x1", "x2"), row_ids = 1:10)
# Sample with MarginalReferenceSampler (preserves within-row correlations)
ref = MarginalReferenceSampler$new(task_numeric, n_samples = 50L)
sampled_ref = ref$sample(c("x1", "x2"), row_ids = 1:10)
# Check correlations
cor_original = cor(task_numeric$data()$x1, task_numeric$data()$x2)
cor_perm = cor(sampled_perm$x1, sampled_perm$x2)
cor_ref = cor(sampled_ref$x1, sampled_ref$x2)
data.table(
method = c("Original", "Permutation", "Reference"),
correlation = c(cor_original, cor_perm, cor_ref)
)
## ----gaussian-sampler---------------------------------------------------------
# Create Gaussian conditional sampler
gaussian = ConditionalGaussianSampler$new(task_numeric)
# Sample x1 conditioned on other features
sampled = gaussian$sample(
feature = "x1",
row_ids = 1:10,
conditioning_set = c("x2", "x3", "x4")
)
# Compare original and conditionally sampled values
original = task_numeric$data(rows = 1:10)
data.table(
original = original$x1,
sampled = sampled$x1,
x2 = original$x2 # Conditioning feature (unchanged)
)
## ----arf-sampler--------------------------------------------------------------
# Create ARF sampler (works with full task including categorical features)
arf = ConditionalARFSampler$new(task_mixed, num_trees = 20, verbose = FALSE)
# Sample island conditioned on body measurements
sampled = arf$sample(
feature = "island",
row_ids = 1:10,
conditioning_set = c("bill_length", "body_mass")
)
# Compare original and sampled island
original = task_mixed$data(rows = 1:10)
data.table(
original_island = original$island,
sampled_island = sampled$island,
bill_length = original$bill_length, # Conditioning feature
body_mass = original$body_mass # Conditioning feature
)
## ----ctree-sampler------------------------------------------------------------
# Create ctree sampler
ctree = ConditionalCtreeSampler$new(task_mixed)
# Sample with default parameters
sampled = ctree$sample(
feature = "bill_length",
row_ids = 1:10,
conditioning_set = "island"
)
original = task_mixed$data(rows = 1:10)
data.table(
island = original$island, # Conditioning feature
original = original$bill_length,
sampled = sampled$bill_length
)
## ----knn-sampler-numeric------------------------------------------------------
# Create kNN sampler with k=5 neighbors
knn_numeric = ConditionalKNNSampler$new(task_numeric, k = 5)
# Sample x1 based on nearest neighbors in (x2, x3) space
sampled_numeric = knn_numeric$sample(
feature = "x1",
row_ids = 1:5,
conditioning_set = c("x2", "x3")
)
original_numeric = task_numeric$data(rows = 1:5)
data.table(
x2 = original_numeric$x2,
x3 = original_numeric$x3,
original_x1 = original_numeric$x1,
sampled_x1 = sampled_numeric$x1
)
## ----knn-sampler-mixed--------------------------------------------------------
# Use task with categorical features
knn_mixed = ConditionalKNNSampler$new(task_mixed, k = 5)
# Sample bill_length conditioning on island (categorical) and body_mass (numeric)
sampled_mixed = knn_mixed$sample(
feature = "bill_length",
row_ids = 1:5,
conditioning_set = c("island", "body_mass")
)
original_mixed = task_mixed$data(rows = 1:5)
data.table(
island = original_mixed$island,
body_mass = original_mixed$body_mass,
original_bill = original_mixed$bill_length,
sampled_bill = sampled_mixed$bill_length
)
## ----knockoff-sampler---------------------------------------------------------
# Create Gaussian knockoff sampler (using task_numeric from earlier)
knockoff = KnockoffGaussianSampler$new(task_numeric)
# Generate knockoffs
original = task_numeric$data(rows = 1:5)
knockoffs = knockoff$sample(
feature = task_numeric$feature_names,
row_ids = 1:5
)
# Original vs knockoff values
data.table(
x1_original = original$x1,
x1_knockoff = knockoffs$x1,
x2_original = original$x2,
x2_knockoff = knockoffs$x2
)
## ----cfi-knockoff, eval = FALSE-----------------------------------------------
# # CFI with knockoff sampler for conditional independence testing
# cfi_knockoff = CFI$new(
# task = task_numeric,
# learner = lrn("regr.ranger"),
# measure = msr("regr.mse"),
# sampler = knockoff
# )
#
# # Compute importance with CPI-based inference
# cfi_knockoff$compute()
# cfi_knockoff$importance(ci_method = "cpi")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.