library(bninfo) library(beepr)
data(dream_net) graphviz.plot(net, shape = "rectangle") node_names <- nodes(fit2net(net))
According to Equation 6 in the manuscript, the presence probability an edge is present is slightly more the 0.5;
(uninformative_presence <- 0.5 + 1 / (2 * (nnodes(net) - 1)))
We have a priori causal knowledge in that receptor edges have a near 0 probability of having incoming edges. I create an edgewise-prior that reflects this knowledge.
receptors <- c("igf1", "il1a", "tgfa", "tnfa") prior <- lapply(receptors, function(receptor){ cbind(from = setdiff(nodes(net), receptors), to = receptor) }) %>% {do.call("rbind", .)} %>% as.data.frame %>% mutate(presence = uninformative_presence, orientation = 1e-06) sample_n(tbl_df(prior), 10)
Next, I simulate a historic dataset with 3300 observations.
.data <- rbn(net, 3300) tbl_df(.data)
First, we demonsrate the naive case of intervention selection. Here, we use simulated historic data, and an uninformative edgewise-prior. The only causal information that can be inferred in this case is uninformative v-structures.
We randomly select nodes to target for intervention. I simulate 3300 observations for the historic data. The constraints I make on the network search I keep consistent across all subsequent analyses.
We start by generating some DAGs from historic data and a uninformative prior. If I don't pass a prior argument to the learning algorithm, it will automatically construct an uninformative prior.
R <- 100 algo_args <- list(tabu = 50, score = "bds", prior = "cs", iss = 1, maxp = 4) start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) cl <- makeCluster(4) dags <- boot_dags(.data, cluster = cl, R = R, m = nrow(.data), algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args)
In the following, I repeat the "passive learning" algorithm several times and look at the average of the results. The following passive learning algorithm incorporates the stopping criteria.
cl <- makeCluster(4) pl_results_list <- NULL for(i in 1:3){ print(i) pl_results <- passive_learning(dags, candidates = node_names, cluster = cl, debug = TRUE) pl_results_list <- c(pl_results_list, list(pl_results)) } # Make array of results for each item pl_results_list <- lapply(pl_results_list, function(item){ item$p0 <- c(item$p0, rep(1, 17 - length(item$p0))) item$info_gains <- c(item$info_gains, rep(0, 17 - length(item$info_gains))) item }) lapply(pl_results_list, function(item) item$info_gains) %>% {do.call("rbind", .)} %>% colMeans %>% round(3) lapply(pl_results_list, function(item) item$p0) %>% {do.call("rbind", .)} %>% colMeans %>% round(3)
Next, I generate expected L1 and TPR performance statistics in the case of naive "passive" selection.
cl <- makeCluster(4) l1_random_vals <- list() # L1 error tpr_random_vals <- list() # True positive rates thresh <- .5 # Edge detection threshold for calculating tpr # Arguments for learning when no intervention is applied no_intervention_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1) start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) for(j in 1:3){ # Get a starting L1 and TPR with just historic data, no interventions # Add a starting error with just observational data. strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = no_intervention_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) tpr_random_start <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} l1_random_start <- l1_error(strength, net) # Get a random ordering of interventions target_ordering <- sample(node_names) print(target_ordering) # Create arrays to hold stats after each experiment l1_random <- rep(NA, length(target_ordering)) tpr_random <- rep(NA, length(target_ordering)) for(i in 1:length(l1_random)){ # Simulate an experiment and combine it with historic data .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i]) int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target")) ints <- lapply(names(.sim), function(item) which(item == int_array)) %>% set_names(names(.sim)) .sim_data <- .sim %>% lapply(`levels<-`, value = c("0", "1")) %>% data.frame %>% {rbind(.data, .)} # Simulate learning of the DAG from the experiment. algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", maxp = 4, iss = 1) strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) # Collect the stats tpr_random[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} l1_random[i] <- l1_error(strength, net) } # Add in the starting stats tpr_random <- c(tpr_random_start, tpr_random) l1_random <- c(l1_random_start, l1_random) message("L1: ", paste0(l1_random, collapse=", ")) message("TPR: ", paste0(round(tpr_random, 2), collapse=", ")) # Add to list tpr_random_vals[[j]] <- tpr_random l1_random_vals[[j]] <- l1_random } random_tpr <- do.call("rbind", tpr_random_vals) %>% colMeans random_L1 <- do.call("rbind", l1_random_vals) %>% colMeans
.data <- rbn(net, 3300) candidates <- names(.data) algo_args <- list(tabu = 50, score = "bds", prior = "cs", iss = 1, maxp = 4) start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) cl <- makeCluster(4) dags <- boot_dags(.data, cluster = cl, R = 300, m = nrow(.data), algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) beep(2)
The following visualizes the distribution of unique equivalent classes amongst the sampled DAGs.
class_dist <- as.numeric(table(sapply(dags, v_string))) # Representation of posterior counts plot(1:length(class_dist), sort(class_dist, decreasing = T), xlab = "Class ID", ylab = "Counts", type = "l", main = "Equivalence Class Frequency", sub = paste("Out of", R, "Inferred DAGs"))
It seems every sampled DAG is from a unique class, probably because of the size of the space.
Immoral v-structures have a causal interpretation can can be learned from historic data alone. Let's look at the distribution of immoral v-structures across DAGs.
class_dist <- as.numeric(table(unlist(sapply(dags, v_array)))) plot(1:length(class_dist), sort(class_dist, decreasing = T), xlab = "Class ID", ylab = "Counts", type = "l", main = "Immoral V-Structure Frequency", sub = "Out of 500 Inferred DAGs")
The is a power-law on v-structures. This gives some confidence that the learning algorithm is consistently detecting some v-structures.
Now I apply the active learning algorithm.
cl <- makeCluster(4) al_results <- active_learning(dags, candidates, cluster = cl, debug = TRUE) save(al_results, file = "vignettes/objects/dream_300dags_uninformative.Rdata")
load(file = "objects/dream_300dags_uninformative.Rdata")
Next, I generate performance statistics on the active learning results. This works by simulating experiments in sequence according to the intervention ordering prescribed by the active learning algorithm. Each experiment in the sequence includes all the interventions in the ordering up to that point in the sequence. I then compare the learned network from simulated experimental data to the ground truth. The statistics I use L1 error and the TPR of edge detection.
I repeatedly run the analysis, collecting statistics each time. In the end I take the mean of the statistics. I stop when the mean stablizes.
algo_args <- list(tabu = 50, score = "bds", prior = "cs", iss = 1, maxp = 4) start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) cl <- makeCluster(4) target_ordering <- al_results$selected l1_set <- list() # L1 Error rates tpr_set <- list() # True positive rates thresh <- .5 # Edge detection threshold for calculating tpr for(j in 1:100){ print(j) l1 <- rep(NA, length(target_ordering)) tpr <- rep(NA, length(target_ordering)) for(i in 1:length(target_ordering)){ print(target_ordering[1:i]) # Simulate inhibition data .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i]) # Combine with simulated historic data int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target")) ints <- lapply(names(.sim), function(item) which(item == int_array)) %>% set_names(names(.sim)) .sim_data <- .sim %>% # Had to grab the 'target' attribute before this step. lapply(`levels<-`, value = c("0", "1")) %>% data.frame %>% {rbind(.data, .)} # Infer the posterior and sample DAGs, convert to strength object algo_args <- list(tabu = 50, score = "mbde", prior = "cs", exp = ints, maxp = 4, iss = 1) strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.data), cpdag = FALSE) tpr[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} l1[i] <- l1_error(strength, net) } # Add a starting error with just observational data. algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1) strength <- boot_dags(.data, cluster = cl, R = 500, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) tpr <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} %>% {c(., tpr)} l1 <- l1_error(strength, net) %>% {c(., l1)} message("L1: ", paste0(l1, collapse=", ")) message("TPR: ", paste0(round(tpr, 2), collapse=", ")) tpr_set[[j]] <- tpr l1_set[[j]] <- l1 } beep(2) selected_tpr <- colMeans(do.call("rbind", tpr_set)) selected_L1 <- colMeans(do.call("rbind", l1_set)) save(selected_tpr, file = "vignettes/objects/selected_tpr_uninformative.Rdata") save(selected_L1, file = "vignettes/objects/selected_L1_uninformative.Rdata")
Comparing to average orderings.
# Simulate random orderings orderings <- combn(candidates, length(target_ordering)) %>% .[, sample(ncol(.), 100)] %>% apply(2, list) %>% lapply(unlist) %>% lapply(sample) cl <- makeCluster(16) l1_random_vals <- list() tpr_random_vals <- list() # True positive rates thresh <- .5 # Edge detection threshold for calculating tpr for(j in 1:length(orderings)){ print(orderings[[j]]) target_ordering <- orderings[[j]] l1_random <- rep(NA, length(target_ordering)) tpr_random <- rep(NA, length(target_ordering)) for(i in 1:length(l1_random)){ # Simulate an experiment and combine with historic data .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i]) int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target")) ints <- lapply(names(.sim), function(item) which(item == int_array)) %>% set_names(names(.sim)) .sim_data <- .sim %>% lapply(`levels<-`, value = c("0", "1")) %>% data.frame %>% {rbind(.data, .)} # Simulate learning of the DAG from the experiment. algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", maxp = 4, iss = 1) strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) tpr_random[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} l1_random[i] <- l1_error(strength, net) } # Add a starting error whit just observational data. algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1) strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) tpr_random <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} %>% {c(., tpr_random)} l1_random <- l1_error(strength, net) %>% {c(., l1_random)} message("L1: ", paste0(l1_random, collapse=", ")) message("TPR: ", paste0(round(tpr_random, 2), collapse=", ")) tpr_random_vals[[j]] <- tpr_random l1_random_vals[[j]] <- l1_random } names(tpr_random_vals) <- sapply(orderings, paste, collapse = "-") random_tpr <- do.call("rbind", tpr_random_vals) %>% colMeans names(l1_random_vals) <- sapply(orderings, paste, collapse = "-") random_L1 <- do.call("rbind", l1_random_vals) %>% colMeans
Generating a blacklist where receptors have no incoming edges.
receptors <- c("igf1", "il1a", "tgfa", "tnfa") receptor_bl <- lapply(receptors, function(receptor){ cbind(from = setdiff(nodes(net), receptors), to = receptor) }) %>% {do.call("rbind", .)}
algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1, blacklist = receptor_bl) start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) cl <- makeCluster(16) dags2 <- boot_dags(.data, cluster = cl, R = 1000, m = nrow(.data), algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) chime()
cl <- makeCluster(16) al_results2 <- active_learning(dags2, candidates, cluster = cl, debug = TRUE) chime() #"il1a" "tnfa" "mek12" "tgfa" "igf1"
Evaluating performance for the blacklist case.
###### cl <- makeCluster(16) target_ordering <- al_results2$selected l1_set <- list() # L1 Error rates tpr_set <- list() # True positive rates thresh <- 0.5 # Edge detection threshold for calculating tpr for(j in 1:100){ print(j) l1 <- rep(NA, length(target_ordering)) tpr <- rep(NA, length(target_ordering)) for(i in 1:length(target_ordering)){ print(target_ordering[1:i]) .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i]) int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target")) ints <- lapply(names(.sim), function(item) which(item == int_array)) %>% set_names(names(.sim)) .sim_data <- .sim %>% # Had to grab the 'target' attribute before this step. lapply(`levels<-`, value = c("0", "1")) %>% data.frame %>% {rbind(.data, .)} algo_args <- list(tabu = 50, score = "mbde", prior = "cs", exp = ints, blacklist = receptor_bl, maxp = 4, iss = 1) strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) tpr[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} l1[i] <- l1_error(strength, net) } # Add a starting error whit just observational data. algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1, blacklist = receptor_bl) strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) tpr <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} %>% {c(., tpr)} l1 <- l1_error(strength, net) %>% {c(., l1)} message("L1: ", paste0(l1, collapse=", ")) message("TPR: ", paste0(round(tpr, 2), collapse=", ")) tpr_set[[j]] <- tpr l1_set[[j]] <- l1 } selected_tpr2 <- colMeans(do.call("rbind", tpr_set)) selected_L12 <- colMeans(do.call("rbind", l1_set))
Comparing to average orderings.
library(combinat) orderings <- combn(candidates, length(target_ordering)) %>% .[, sample(ncol(.), 100)] %>% apply(2, list) %>% lapply(unlist) %>% lapply(sample) # 30 of them is fine cl <- makeCluster(16) l1_random_vals <- list() tpr_random_vals <- list() # True positive rates thresh <- .5 # Edge detection threshold for calculating tpr for(j in 1:length(orderings)){ print(orderings[[j]]) target_ordering <- orderings[[j]] l1_random <- rep(NA, length(target_ordering)) tpr_random <- rep(NA, length(target_ordering)) for(i in 1:length(l1_random)){ .sim <- rbn_inhibition(net, n = 200, target_ordering[1:i]) int_array <- c(rep("obs", nrow(.data)), attr(.sim, "target")) ints <- lapply(names(.sim), function(item) which(item == int_array)) %>% set_names(names(.sim)) .sim_data <- .sim %>% lapply(`levels<-`, value = c("0", "1")) %>% data.frame %>% {rbind(.data, .)} algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", blacklist = receptor_bl, maxp=4,iss=1) strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) tpr_random[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} l1_random[i] <- l1_error(strength, net) } # Add a starting error whit just observational data. algo_args <- list(tabu = 50, score = "bds", prior = "cs", maxp = 4, iss = 1, blacklist = receptor_bl) strength <- boot_dags(.sim_data, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim_data), cpdag = FALSE) tpr_random <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, prediction >= thresh)) / nrow(filter(., label == 1))} %>% {c(., tpr_random)} l1_random <- l1_error(strength, net) %>% {c(., l1_random)} message("L1: ", paste0(l1_random, collapse=", ")) message("TPR: ", paste0(round(tpr_random, 2), collapse=", ")) tpr_random_vals[[j]] <- tpr_random l1_random_vals[[j]] <- l1_random } names(tpr_random_vals) <- sapply(orderings, paste, collapse = "-") random_tpr2 <- do.call("rbind", tpr_random_vals) %>% colMeans names(l1_random_vals) <- sapply(orderings, paste, collapse = "-") random_L12 <- do.call("rbind", l1_random_vals) %>% colMeans
Comparing no-blacklist inference to blacklist inference
plot(al_results$info_gains, type = "o", pch = "*") lines(al_results2$info_gains, lty=2, type = "o", pch = "*") plot(al_results$p0, type = "o", pch = "*") lines(al_results2$p0, lty=2, type = "o", pch = "*")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.