title: "Active Learning on the T-Cell Network"
author: "Robert Ness"
date: "r Sys.Date()
"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Vignette Title}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
library(bninfo) library(parallel)
data("tcell_examples") net <- tcell_examples$net_fit graphviz.plot(net, shape = "rectangle")
Candidates for inhibition
(candidates <- c("PIP2", "Akt", "PKC", "PKA", "Mek"))
(uninformative_presence <- 0.5 + 1 / (2 * (nnodes(net) - 1)))
sim_batch <- function(fitted_net, int_targets){ obs <- rbn_fixed(net, n = 100) %>% lapply(`levels<-`, value = c("1", "2", "3")) %>% data.frame if(length(int_targets) == 1){ ints <- lapply(names(obs), function(item) which(item == "derp")) %>% set_names(names(obs)) out <- obs attr(out, "ints") <- ints } else { .sim <- rbn_inhibition(net, n = 200, int_targets[-1]) # first one in none int_array <- c(rep("obs", nrow(obs)), attr(.sim, "target")) ints <- lapply(names(.sim), function(item) which(item == int_array)) %>% set_names(names(.sim)) out <- .sim %>% # Had to grab the 'target' attribute before this step. lapply(`levels<-`, value = c("1", "2", "3")) %>% data.frame %>% {rbind(obs, .)} attr(out, "ints") <- ints } out }
In this example, I use PKC and PKA activation of the MAPK pathway.
mapk <- matrix(c("PKC", "Raf", "PKA", "Raf", "Raf", "Mek", "Mek", "Erk"), ncol = 2, byrow = T) %>% set_colnames(c("from", "to")) prior <- mapk %>% data.frame %>% mutate(prob = 1 - 1e-6) graphviz.plot(construct_bn(mapk), shape = "rectangle")
I start with data simulated from a fitted Bayes net model of T cell signaling. I simulate 3300 observations.
.data <- rbn_fixed(net, n = 100) %>% lapply(`levels<-`, value = c("1", "2", "3")) %>% data.frame
R <- 1000 algo_args <- list(tabu = 50, score = "mbde", prior = "cs", iss = 1, maxp = 4) start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) cl <- makeCluster(16) dags_uninformative <- boot_dags(.data, cluster = cl, R = R, m = nrow(.data), algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args)
save(dags_uninformative, file = "vignettes/objects/tcells/dags_uninformative.Rdata")
load(file = "objects/tcells/dags_uninformative.Rdata")
In the following, I repeat the "passive learning" algorithm several times and look at the average of the results. The following passive learning algorithm incorporates the stopping criteria.
cl <- makeCluster(16) pl_results_list <- NULL for(i in 1:100){ print(i) pl_results <- passive_learning(dags_uninformative, candidates = candidates, cluster = cl, debug = TRUE) pl_results_list <- c(pl_results_list, list(pl_results)) } # Make array of results for each item pl_results_list <- lapply(pl_results_list, function(item){ item$p0 <- c(item$p0, rep(1, 5 - length(item$p0))) item$info_gains <- c(item$info_gains, rep(0, 5 - length(item$info_gains))) item })
save(pl_results_list, file = "vignettes/objects/tcells/random_IG_P0.Rdata")
load(file = "objects/tcells/random_IG_P0.Rdata")
mean_info_gains <- lapply(pl_results_list, function(item) item$info_gains) %>% {do.call("rbind", .)} %>% colMeans %>% round(3) mean_info_gains mean_p0 <- lapply(pl_results_list, function(item) item$p0) %>% {do.call("rbind", .)} %>% colMeans %>% round(3) mean_p0
Next, I generate expected L1 and TPR performance statistics in the case of naive "passive" selection. We replicate the same combination of receptor stimulus as the original experiment
cl <- makeCluster(16) l1_random_vals <- list() # L1 error tpr_random_vals <- list() # True positive rates thresh <- uninformative_presence# Edge detection threshold for calculating tpr # Arguments for learning when no intervention is applied start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) for(j in 1:100){ # Get a random ordering of interventions target_ordering <- c("none", sample(candidates)) print(target_ordering) # Create arrays to hold stats after each experiment l1_random <- rep(NA, length(target_ordering)) tpr_random <- rep(NA, length(target_ordering)) for(i in 1:length(target_ordering)){ # Simulate an experiment and combine it with historic data .sim <- sim_batch(model, target_ordering[1:i]) ints <- attr(.sim, "ints") # Simulate learning of the DAG from the experiment. algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", maxp = 4, iss = 1) strength <- boot_dags(.sim, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim), cpdag = FALSE) # Collect the stats tpr_random[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, strength >= thresh, direction > .5)) / nrow(filter(., label == 1))} l1_random[i] <- l1_error(strength, net) } message("L1: ", paste0(l1_random, collapse=", ")) message("TPR: ", paste0(round(tpr_random, 2), collapse=", ")) # Add to list tpr_random_vals[[j]] <- tpr_random l1_random_vals[[j]] <- l1_random }
save(tpr_random_vals, file="vignettes/objects/tcells/random_tpr.Rdata") save(l1_random_vals, file="vignettes/objects/tcells/random_l1.Rdata")
load(file="objects/tcells/random_tpr.Rdata") load(file="objects/tcells/random_l1.Rdata")
(random_tpr <- do.call("rbind", tpr_random_vals) %>% colMeans %>% round(3)) (random_L1 <- do.call("rbind", l1_random_vals) %>% colMeans %>% round(3))
cl <- makeCluster(4) al_results_uninformative <- active_learning(dags_uninformative, candidates, cluster = cl, debug = TRUE)
save(al_results_uninformative, file = "vignettes/objects/tcells/al_results_uninformative.Rdata")
load(file = "objects/tcells/al_results_uninformative.Rdata")
Acquire the TPR and L1 error for this ordering. I repeat the analysis a few times and take the average.
cl <- makeCluster(16) target_ordering <- c("none", al_results_uninformative$selected) l1_set_uninformative <- list() # L1 Error rates tpr_set_uninformative <- list() # True positive rates thresh <- uninformative_presence # Edge detection threshold for calculating tpr start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) for(j in 1:100){ # Get a random ordering of interventions print(target_ordering) # Create arrays to hold stats after each experiment l1 <- rep(NA, length(target_ordering)) tpr <- rep(NA, length(target_ordering)) for(i in 1:length(target_ordering)){ # Simulate an experiment and combine it with historic data .sim <- sim_batch(model, target_ordering[1:i]) ints <- attr(.sim, "ints") # Simulate learning of the DAG from the experiment. algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", maxp = 4, iss = 1) strength <- boot_dags(.sim, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim), cpdag = FALSE) # Collect the stats tpr[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, strength >= thresh, direction > .5)) / nrow(filter(., label == 1))} l1[i] <- l1_error(strength, net) } message("L1: ", paste0(l1, collapse=", ")) message("TPR: ", paste0(round(tpr, 2), collapse=", ")) # Add to list tpr_set_uninformative[[j]] <- tpr l1_set_uninformative[[j]] <- l1 }
save(tpr_set_uninformative, file = "vignettes/objects/tcells/tpr_uninformative.Rdata") save(l1_set_uninformative, file = "vignettes/objects/tcells/L1_uninformative.Rdata")
load(file = "objects/tcells/tpr_uninformative.Rdata") load(file = "objects/tcells/L1_uninformative.Rdata")
colMeans(do.call("rbind", tpr_set_uninformative)) %>% round(3) colMeans(do.call("rbind", l1_set_uninformative)) %>% round(3)
Now, we get an optimal ordering with the canonical edges .
I use the canonical edges as a white list, limiting the DAG search only to cases where the white list is available.
Start by simulating some DAGs.
Used a custom algorithm for generating DAGs that works with a whitelist.
algo_args <- list(tabu = 50, score = "mbde", prior = "cs", whitelist = mapk) start_net_args <- list(nodes = names(.data), whitelist = mapk) cl <- makeCluster(16) dags_informative <- boot_dags(.data, R = 1000, cluster = cl, m = nrow(.data), algorithm = "tabu",algorithm.args = algo_args, random.graph.args = start_net_args)
save(dags_informative, file = "vignettes/objects/tcells/dags_informative.Rdata")
load(dags_informative, file = "objects/tcells/dags_informative.Rdata")
cl <- makeCluster(4) al_results_informative <- active_learning(dags_informative, candidates, beta = prior, cluster = cl, debug = TRUE)
cl <- makeCluster(16) target_ordering <- c("none", al_results_informative$selected) l1_set_informative <- list() # L1 Error rates tpr_set_informative <- list() # True positive rates thresh <- uninformative_presence # Edge detection threshold for calculating tpr start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, max.in.degree = 3, max.out.degree = 3) for(j in 1:100){ # Get a random ordering of interventions print(target_ordering) # Create arrays to hold stats after each experiment l1 <- rep(NA, length(target_ordering)) tpr <- rep(NA, length(target_ordering)) for(i in 1:length(target_ordering)){ # Simulate an experiment and combine it with historic data .sim <- sim_batch(model, target_ordering[1:i]) ints <- attr(.sim, "ints") # Simulate learning of the DAG from the experiment. algo_args <- list(tabu = 50, score = "mbde", exp = ints, prior = "cs", maxp = 4, iss = 1) strength <- boot_dags(.sim, cluster = cl, R = 200, algorithm = "tabu", algorithm.args = algo_args, random.graph.args = start_net_args) %>% custom.strength(names(.sim), cpdag = FALSE) # Collect the stats tpr[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>% {nrow(filter(., label == 1, strength >= thresh, direction > .5)) / nrow(filter(., label == 1))} l1[i] <- l1_error(strength, net) } message("L1: ", paste0(l1, collapse=", ")) message("TPR: ", paste0(round(tpr, 2), collapse=", ")) # Add to list tpr_set_informative[[j]] <- tpr l1_set_informative[[j]] <- l1 }
save(tpr_set_informative, file = "vignettes/objects/tcells/tpr_informative.Rdata") save(l1_set_informative, file = "vignettes/objects/tcells/L1_informative.Rdata")
load(file = "objects/tcells/tpr_informative.Rdata") load(file = "objects/tcells/L1_informative.Rdata")
do.call("rbind", tpr_set_informative) %>% colMeans %>% round(3) do.call("rbind", l1_set_informative) %>% colMeans %>% round(3)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.