In robertness/bninfo: Queries and information theoritic operations on Bayesian networks.

library(bninfo)
library(beepr)

data(dream_net)
graphviz.plot(net, shape = "rectangle")
node_names <- nodes(fit2net(net))

Preliminaries: Causal Edgewise-Prior and Historic Data

According to Equation 6 in the manuscript, the presence probability an edge is present is slightly more the 0.5;

(uninformative_presence <- 0.5 + 1 / (2 * (nnodes(net) - 1)))

We have a priori causal knowledge in that receptor edges have a near 0 probability of having incoming edges. I create an edgewise-prior that reflects this knowledge.

receptors <- c("igf1", "il1a", "tgfa", "tnfa")
prior <- lapply(receptors, 
             function(receptor){
               cbind(from = setdiff(nodes(net), receptors), to = receptor)
             }) %>%
  {do.call("rbind", .)} %>%
  as.data.frame %>%
  mutate(presence = uninformative_presence, 
         orientation = 1e-06)
sample_n(tbl_df(prior), 10)

Next, I simulate a historic dataset with 3300 observations.

.data <- rbn(net, 3300)
tbl_df(.data)

Naive Case: Picking interventions at random.

First, we demonsrate the naive case of intervention selection. Here, we use simulated historic data, and an uninformative edgewise-prior. The only causal information that can be inferred in this case is uninformative v-structures.

We randomly select nodes to target for intervention. I simulate 3300 observations for the historic data. The constraints I make on the network search I keep consistent across all subsequent analyses.
We start by generating some DAGs from historic data and a uninformative prior. If I don't pass a prior argument to the learning algorithm, it will automatically construct an uninformative prior.

R <- 100
algo_args <- list(tabu = 50, score = "bds", prior = "cs", iss = 1, maxp = 4)
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, 
                       max.in.degree = 3, max.out.degree = 3)
cl <- makeCluster(4)
dags <- boot_dags(.data, cluster = cl, R = R, m = nrow(.data), algorithm = "tabu",
                  algorithm.args = algo_args, random.graph.args = start_net_args)

Information Gain and Stopping Criteria in the Naive Case.

In the following, I repeat the "passive learning" algorithm several times and look at the average of the results. The following passive learning algorithm incorporates the stopping criteria.

cl <- makeCluster(4)
pl_results_list <- NULL
for(i in 1:3){
  print(i)
  pl_results <- passive_learning(dags, candidates = node_names, cluster = cl, debug = TRUE)
  pl_results_list <- c(pl_results_list, list(pl_results))
}
# Make array of results for each item
pl_results_list <- lapply(pl_results_list, function(item){
  item$p0 <- c(item$p0, rep(1, 17 - length(item$p0)))
  item$info_gains <- c(item$info_gains, rep(0, 17 - length(item$info_gains)))
  item
})
lapply(pl_results_list, function(item) item$info_gains) %>% {do.call("rbind", .)} %>% colMeans %>% round(3)
lapply(pl_results_list, function(item) item$p0) %>% {do.call("rbind", .)} %>% colMeans %>% round(3)

Next, I generate expected L1 and TPR performance statistics in the case of naive "passive" selection.

cl <- makeCluster(4)
l1_random_vals <- list() # L1 error
tpr_random_vals <- list() # True positive rates
thresh <- .5 # Edge detection threshold for calculating tpr
# Arguments for learning when no intervention is applied
no_intervention_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1)
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, 
                       max.in.degree = 3, max.out.degree = 3)
for(j in 1:3){
  # Get a starting L1 and TPR with just historic data, no interventions
  # Add a starting error with just observational data.
  strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = no_intervention_args,
                        random.graph.args = start_net_args) %>%
      custom.strength(names(.sim_data), cpdag = FALSE)
  tpr_random_start <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))}
  l1_random_start <- l1_error(strength, net)
  # Get a random ordering of interventions
  target_ordering <- sample(node_names)
  print(target_ordering)
  # Create arrays to hold stats after each experiment
  l1_random <- rep(NA, length(target_ordering))
  tpr_random <- rep(NA, length(target_ordering))
  for(i in 1:length(l1_random)){
    # Simulate an experiment and combine it with historic data
    .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i])
    int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target"))
    ints <- lapply(names(.sim), function(item) which(item == int_array)) %>%
      set_names(names(.sim))
    .sim_data <- .sim %>%
      lapply(`levels<-`, value = c("0", "1")) %>%
      data.frame %>%
      {rbind(.data, .)}
    # Simulate learning of the DAG from the experiment.  
    algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", maxp = 4, iss = 1)
    strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
        custom.strength(names(.sim_data), cpdag = FALSE)
    # Collect the stats
    tpr_random[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))}
    l1_random[i] <- l1_error(strength, net)
  }
  # Add in the starting stats
  tpr_random <- c(tpr_random_start, tpr_random)
  l1_random <- c(l1_random_start, l1_random)
  message("L1: ", paste0(l1_random, collapse=", "))  
  message("TPR: ", paste0(round(tpr_random, 2), collapse=", "))  
  # Add to list
  tpr_random_vals[[j]] <- tpr_random
  l1_random_vals[[j]] <- l1_random
}
random_tpr <- do.call("rbind", tpr_random_vals) %>% colMeans 
random_L1 <- do.call("rbind", l1_random_vals) %>% colMeans

Active learning with uninformative edge-wise prior

.data <- rbn(net, 3300)
candidates <- names(.data)
algo_args <- list(tabu = 50, score = "bds", prior = "cs", iss = 1, maxp = 4)
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, 
                       max.in.degree = 3, max.out.degree = 3)
cl <- makeCluster(4)
dags <- boot_dags(.data, cluster = cl, R = 300, m = nrow(.data), algorithm = "tabu",
                  algorithm.args = algo_args, random.graph.args = start_net_args)
beep(2)

Visualizing DAG distribution

The following visualizes the distribution of unique equivalent classes amongst the sampled DAGs.

class_dist <- as.numeric(table(sapply(dags, v_string))) # Representation of posterior counts
plot(1:length(class_dist), sort(class_dist, decreasing = T), xlab = "Class ID", ylab = "Counts",
     type = "l", main = "Equivalence Class Frequency", sub = paste("Out of", R, "Inferred DAGs"))

It seems every sampled DAG is from a unique class, probably because of the size of the space.

Immoral v-structures have a causal interpretation can can be learned from historic data alone. Let's look at the distribution of immoral v-structures across DAGs.

class_dist <- as.numeric(table(unlist(sapply(dags, v_array)))) 
plot(1:length(class_dist), sort(class_dist, decreasing = T), xlab = "Class ID", ylab = "Counts",
     type = "l", main = "Immoral V-Structure Frequency", sub = "Out of 500 Inferred DAGs")

The is a power-law on v-structures. This gives some confidence that the learning algorithm is consistently detecting some v-structures.

Active learning

Now I apply the active learning algorithm.

cl <- makeCluster(4)
al_results <- active_learning(dags, candidates, cluster = cl, debug = TRUE)
save(al_results, file = "vignettes/objects/dream_300dags_uninformative.Rdata")

load(file = "objects/dream_300dags_uninformative.Rdata")

Next, I generate performance statistics on the active learning results. This works by simulating experiments in sequence according to the intervention ordering prescribed by the active learning algorithm. Each experiment in the sequence includes all the interventions in the ordering up to that point in the sequence. I then compare the learned network from simulated experimental data to the ground truth. The statistics I use L1 error and the TPR of edge detection.

I repeatedly run the analysis, collecting statistics each time. In the end I take the mean of the statistics. I stop when the mean stablizes.

algo_args <- list(tabu = 50, score = "bds", prior = "cs", iss = 1, maxp = 4)
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, 
                       max.in.degree = 3, max.out.degree = 3)
cl <- makeCluster(4)
target_ordering <- al_results$selected
l1_set <- list() # L1 Error rates
tpr_set <- list() # True positive rates
thresh <- .5 # Edge detection threshold for calculating tpr
for(j in 1:100){
  print(j)
  l1 <- rep(NA, length(target_ordering))
  tpr <- rep(NA, length(target_ordering))
  for(i in 1:length(target_ordering)){
    print(target_ordering[1:i])
    # Simulate inhibition data
    .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i])
    # Combine with simulated historic data
    int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target"))
    ints <- lapply(names(.sim), function(item) which(item == int_array)) %>%
      set_names(names(.sim))
    .sim_data <- .sim %>% # Had to grab the 'target' attribute before this step.
      lapply(`levels<-`, value = c("0", "1")) %>%
      data.frame %>%
      {rbind(.data, .)}
    # Infer the posterior and sample DAGs, convert to strength object
    algo_args <- list(tabu = 50, score = "mbde", prior = "cs", exp = ints, maxp = 4, iss = 1)
    strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
        custom.strength(names(.data), cpdag = FALSE)
    tpr[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))}
    l1[i] <- l1_error(strength, net)
  }
  # Add a starting error with just observational data.
  algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1)
  strength <- boot_dags(.data, cluster = cl, R = 500, 
                        algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
    custom.strength(names(.sim_data), cpdag = FALSE)
    tpr <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} %>%
      {c(., tpr)}
    l1 <- l1_error(strength, net) %>%
      {c(., l1)}
  message("L1: ", paste0(l1, collapse=", "))  
  message("TPR: ", paste0(round(tpr, 2), collapse=", "))  
  tpr_set[[j]] <- tpr
  l1_set[[j]] <- l1 
}
beep(2)
selected_tpr <- colMeans(do.call("rbind", tpr_set))
selected_L1 <-  colMeans(do.call("rbind", l1_set)) 
save(selected_tpr, file = "vignettes/objects/selected_tpr_uninformative.Rdata")
save(selected_L1, file = "vignettes/objects/selected_L1_uninformative.Rdata")

Comparing to average orderings.

# Simulate random orderings
orderings <- combn(candidates, length(target_ordering)) %>%
  .[, sample(ncol(.), 100)] %>%
  apply(2, list) %>%
  lapply(unlist) %>%
  lapply(sample)
cl <- makeCluster(16)
l1_random_vals <- list()
tpr_random_vals <- list() # True positive rates
thresh <- .5 # Edge detection threshold for calculating tpr
for(j in 1:length(orderings)){
  print(orderings[[j]])
  target_ordering <- orderings[[j]]
  l1_random <- rep(NA, length(target_ordering))
  tpr_random <- rep(NA, length(target_ordering))
  for(i in 1:length(l1_random)){
    # Simulate an experiment and combine with historic data
    .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i])
    int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target"))
    ints <- lapply(names(.sim), function(item) which(item == int_array)) %>%
      set_names(names(.sim))
    .sim_data <- .sim %>%
      lapply(`levels<-`, value = c("0", "1")) %>%
      data.frame %>%
      {rbind(.data, .)}
    # Simulate learning of the DAG from the experiment.  
    algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", maxp = 4, iss = 1)
    strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
        custom.strength(names(.sim_data), cpdag = FALSE)
    tpr_random[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))}
    l1_random[i] <- l1_error(strength, net)
  }
  # Add a starting error whit just observational data.
  algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1)
  strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
    custom.strength(names(.sim_data), cpdag = FALSE)
  tpr_random <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} %>%
      {c(., tpr_random)}
  l1_random <- l1_error(strength, net) %>%
      {c(., l1_random)}
  message("L1: ", paste0(l1_random, collapse=", "))  
  message("TPR: ", paste0(round(tpr_random, 2), collapse=", "))  
  tpr_random_vals[[j]] <- tpr_random
  l1_random_vals[[j]] <- l1_random
}
names(tpr_random_vals) <- sapply(orderings, paste, collapse = "-")
random_tpr <- do.call("rbind", tpr_random_vals) %>%
  colMeans 
names(l1_random_vals) <- sapply(orderings, paste, collapse = "-")
random_L1 <- do.call("rbind", l1_random_vals) %>%
  colMeans

Now with the receptor blacklist.

Generating a blacklist where receptors have no incoming edges.

receptors <- c("igf1", "il1a", "tgfa", "tnfa")
receptor_bl <- lapply(receptors, 
             function(receptor){
               cbind(from = setdiff(nodes(net), receptors), to = receptor)
             }) %>%
  {do.call("rbind", .)}

Generating DAGs

algo_args <- list(tabu = 50, score = "bds", prior = "cs",
                  maxp = 4, iss = 1, blacklist = receptor_bl)
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3)
cl <- makeCluster(16)
dags2 <- boot_dags(.data, cluster = cl, R = 1000, m = nrow(.data), algorithm = "tabu",
                  algorithm.args = algo_args, random.graph.args = start_net_args)
chime()

Running the active learning

cl <- makeCluster(16)
al_results2 <- active_learning(dags2, candidates, cluster = cl, debug = TRUE)
chime()
#"il1a"  "tnfa"  "mek12" "tgfa"  "igf1"

Evaluating performance for the blacklist case.

######
cl <- makeCluster(16)
target_ordering <- al_results2$selected
l1_set <- list() # L1 Error rates
tpr_set <- list() # True positive rates
thresh <-  0.5 # Edge detection threshold for calculating tpr
for(j in 1:100){
  print(j)
  l1 <- rep(NA, length(target_ordering))
  tpr <- rep(NA, length(target_ordering))
  for(i in 1:length(target_ordering)){
    print(target_ordering[1:i])
    .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i])
    int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target"))
    ints <- lapply(names(.sim), function(item) which(item == int_array)) %>%
      set_names(names(.sim))
    .sim_data <- .sim %>% # Had to grab the 'target' attribute before this step.
      lapply(`levels<-`, value = c("0", "1")) %>%
      data.frame %>%
      {rbind(.data, .)}
    algo_args <- list(tabu = 50, score = "mbde", prior = "cs", exp = ints, 
                      blacklist = receptor_bl, maxp = 4, iss = 1)
    strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
        custom.strength(names(.sim_data), cpdag = FALSE)
    tpr[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))}
    l1[i] <- l1_error(strength, net)
  }
  # Add a starting error whit just observational data.
  algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1, 
                    blacklist = receptor_bl)
  strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
    custom.strength(names(.sim_data), cpdag = FALSE)
    tpr <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} %>%
      {c(., tpr)}
    l1 <- l1_error(strength, net) %>%
      {c(., l1)}
  message("L1: ", paste0(l1, collapse=", "))  
  message("TPR: ", paste0(round(tpr, 2), collapse=", "))  
  tpr_set[[j]] <- tpr
  l1_set[[j]] <- l1 
}
selected_tpr2 <- colMeans(do.call("rbind", tpr_set))
selected_L12 <-  colMeans(do.call("rbind", l1_set))

Comparing to average orderings.

library(combinat)
orderings <- combn(candidates, length(target_ordering)) %>%
  .[, sample(ncol(.), 100)] %>%
  apply(2, list) %>%
  lapply(unlist) %>%
  lapply(sample) 

# 30 of them is fine
cl <- makeCluster(16)
l1_random_vals <- list()
tpr_random_vals <- list() # True positive rates
thresh <- .5 # Edge detection threshold for calculating tpr
for(j in 1:length(orderings)){
  print(orderings[[j]])
  target_ordering <- orderings[[j]]
  l1_random <- rep(NA, length(target_ordering))
  tpr_random <- rep(NA, length(target_ordering))
  for(i in 1:length(l1_random)){
    .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i])
    int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target"))
    ints <- lapply(names(.sim), function(item) which(item == int_array)) %>%
      set_names(names(.sim))
    .sim_data <- .sim %>%
      lapply(`levels<-`, value = c("0", "1")) %>%
      data.frame %>%
      {rbind(.data, .)}
    algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", 
                      blacklist = receptor_bl, maxp=4,iss=1)
    strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
        custom.strength(names(.sim_data), cpdag = FALSE)
    tpr_random[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))}
    l1_random[i] <- l1_error(strength, net)
  }
  # Add a starting error whit just observational data.
  algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1, 
                    blacklist = receptor_bl)
  strength <- boot_dags(.sim_data, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
    custom.strength(names(.sim_data), cpdag = FALSE)
  tpr_random <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
      {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} %>%
      {c(., tpr_random)}
  l1_random <- l1_error(strength, net) %>%
      {c(., l1_random)}
  message("L1: ", paste0(l1_random, collapse=", "))  
  message("TPR: ", paste0(round(tpr_random, 2), collapse=", "))  
  tpr_random_vals[[j]] <- tpr_random
  l1_random_vals[[j]] <- l1_random
}
names(tpr_random_vals) <- sapply(orderings, paste, collapse = "-")
random_tpr2 <- do.call("rbind", tpr_random_vals) %>%
  colMeans 
names(l1_random_vals) <- sapply(orderings, paste, collapse = "-")
random_L12 <- do.call("rbind", l1_random_vals) %>%
  colMeans

Comparing no-blacklist inference to blacklist inference

plot(al_results$info_gains, type = "o", pch = "*")
lines(al_results2$info_gains, lty=2, type = "o", pch = "*")
plot(al_results$p0, type = "o", pch = "*")
lines(al_results2$p0, lty=2, type = "o", pch = "*")