title: "Active Learning on the T-Cell Network" author: "Robert Ness" date: "r Sys.Date()" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Vignette Title} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8}


library(bninfo)
library(parallel)

Ground truth network.

data("tcell_examples")
net <- tcell_examples$net_fit 
graphviz.plot(net, shape = "rectangle")

Candidates for inhibition

(candidates <- c("PIP2", "Akt", "PKC", "PKA", "Mek"))
(uninformative_presence <- 0.5 + 1 / (2 * (nnodes(net) - 1)))
sim_batch <- function(fitted_net, int_targets){
  obs <- rbn_fixed(net, n = 100) %>%
    lapply(`levels<-`, value = c("1", "2", "3")) %>%
    data.frame
  if(length(int_targets) == 1){
    ints <- lapply(names(obs), function(item) which(item == "derp")) %>%
      set_names(names(obs))
    out <- obs
    attr(out, "ints") <- ints
  } else {
    .sim <- rbn_inhibition(net, n = 200, int_targets[-1]) # first one in none
    int_array <- c(rep("obs", nrow(obs)), attr(.sim, "target"))
    ints <- lapply(names(.sim), function(item) which(item == int_array)) %>%
      set_names(names(.sim))
    out <- .sim %>% # Had to grab the 'target' attribute before this step.
      lapply(`levels<-`, value = c("1", "2", "3")) %>%
      data.frame %>%
      {rbind(obs, .)}
    attr(out, "ints") <- ints
  }
  out
}

Prior information regarding the network structure

In this example, I use PKC and PKA activation of the MAPK pathway.

mapk <- matrix(c("PKC", "Raf",
                 "PKA", "Raf",
                 "Raf", "Mek",
                 "Mek", "Erk"), ncol = 2, byrow = T) %>%
  set_colnames(c("from", "to"))
prior <- mapk %>%
  data.frame %>%
  mutate(prob = 1 - 1e-6)
graphviz.plot(construct_bn(mapk), shape = "rectangle")

Historic data

I start with data simulated from a fitted Bayes net model of T cell signaling. I simulate 3300 observations.

.data <- rbn_fixed(net, n = 100) %>%
  lapply(`levels<-`, value = c("1", "2", "3")) %>%
  data.frame

Naive case: passive selection with uninformative prior

R <- 1000
algo_args <- list(tabu = 50, score = "mbde", prior = "cs", iss = 1, maxp = 4)
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, 
                       max.in.degree = 3, max.out.degree = 3)
cl <- makeCluster(16)
dags_uninformative <- boot_dags(.data, cluster = cl, R = R, 
                             m = nrow(.data), algorithm = "tabu",
                             algorithm.args = algo_args, 
                             random.graph.args = start_net_args)
save(dags_uninformative, file = "vignettes/objects/tcells/dags_uninformative.Rdata")
load(file = "objects/tcells/dags_uninformative.Rdata")

Information Gain and Stopping Criteria in the Naive Case.

In the following, I repeat the "passive learning" algorithm several times and look at the average of the results. The following passive learning algorithm incorporates the stopping criteria.

cl <- makeCluster(16)
pl_results_list <- NULL
for(i in 1:100){
  print(i)
  pl_results <- passive_learning(dags_uninformative, candidates = candidates, cluster = cl, debug = TRUE)
  pl_results_list <- c(pl_results_list, list(pl_results))
}
# Make array of results for each item
pl_results_list <- lapply(pl_results_list, function(item){
  item$p0 <- c(item$p0, rep(1, 5 - length(item$p0)))
  item$info_gains <- c(item$info_gains, rep(0, 5 - length(item$info_gains)))
  item
})
save(pl_results_list, file = "vignettes/objects/tcells/random_IG_P0.Rdata")
load(file = "objects/tcells/random_IG_P0.Rdata")
mean_info_gains <- lapply(pl_results_list, function(item) item$info_gains) %>% 
  {do.call("rbind", .)} %>% colMeans %>% round(3)
mean_info_gains
mean_p0 <- lapply(pl_results_list, function(item) item$p0) %>% 
  {do.call("rbind", .)} %>% colMeans %>% round(3)
mean_p0

Naive case performance

Next, I generate expected L1 and TPR performance statistics in the case of naive "passive" selection. We replicate the same combination of receptor stimulus as the original experiment

cl <- makeCluster(16)
l1_random_vals <- list() # L1 error
tpr_random_vals <- list() # True positive rates
thresh <- uninformative_presence# Edge detection threshold for calculating tpr
# Arguments for learning when no intervention is applied
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, 
                       max.in.degree = 3, max.out.degree = 3)
for(j in 1:100){
  # Get a random ordering of interventions
  target_ordering <- c("none", sample(candidates))
  print(target_ordering)
  # Create arrays to hold stats after each experiment
  l1_random <- rep(NA, length(target_ordering))
  tpr_random <- rep(NA, length(target_ordering))
  for(i in 1:length(target_ordering)){
    # Simulate an experiment and combine it with historic data
    .sim <- sim_batch(model, target_ordering[1:i])
    ints <- attr(.sim, "ints")
    # Simulate learning of the DAG from the experiment.  
    algo_args <- list(tabu = 50, score = "mbde", exp = ints, 
                      prior = "cs", maxp = 4, iss = 1)
    strength <- boot_dags(.sim, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
        custom.strength(names(.sim), cpdag = FALSE)
    # Collect the stats
    tpr_random[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
                      {nrow(filter(., label == 1, strength >= thresh, direction > .5)) / 
                      nrow(filter(., label == 1))}
    l1_random[i] <- l1_error(strength, net)
  }
  message("L1: ", paste0(l1_random, collapse=", "))  
  message("TPR: ", paste0(round(tpr_random, 2), collapse=", "))  
  # Add to list
  tpr_random_vals[[j]] <- tpr_random
  l1_random_vals[[j]] <- l1_random
}
save(tpr_random_vals, file="vignettes/objects/tcells/random_tpr.Rdata")
save(l1_random_vals, file="vignettes/objects/tcells/random_l1.Rdata")
load(file="objects/tcells/random_tpr.Rdata")
load(file="objects/tcells/random_l1.Rdata")
(random_tpr <- do.call("rbind", tpr_random_vals) %>% colMeans %>% round(3)) 
(random_L1 <- do.call("rbind", l1_random_vals) %>% colMeans %>% round(3))

Active learning with uninformative edge-wise prior

cl <- makeCluster(4)
al_results_uninformative <- active_learning(dags_uninformative, candidates, cluster = cl, debug = TRUE)
save(al_results_uninformative, 
     file = "vignettes/objects/tcells/al_results_uninformative.Rdata")
load(file = "objects/tcells/al_results_uninformative.Rdata")

Acquire the TPR and L1 error for this ordering. I repeat the analysis a few times and take the average.

cl <- makeCluster(16)
target_ordering <- c("none", al_results_uninformative$selected)
l1_set_uninformative <- list() # L1 Error rates
tpr_set_uninformative <- list() # True positive rates
thresh <- uninformative_presence # Edge detection threshold for calculating tpr
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, 
                       max.in.degree = 3, max.out.degree = 3)
for(j in 1:100){
  # Get a random ordering of interventions
  print(target_ordering)
  # Create arrays to hold stats after each experiment
  l1 <- rep(NA, length(target_ordering))
  tpr <- rep(NA, length(target_ordering))
  for(i in 1:length(target_ordering)){
    # Simulate an experiment and combine it with historic data
    .sim <- sim_batch(model, target_ordering[1:i])
    ints <- attr(.sim, "ints")
    # Simulate learning of the DAG from the experiment.  
    algo_args <- list(tabu = 50, score = "mbde", exp = ints, 
                      prior = "cs", maxp = 4, iss = 1)
    strength <- boot_dags(.sim, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
        custom.strength(names(.sim), cpdag = FALSE)
    # Collect the stats
    tpr[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
                      {nrow(filter(., label == 1, strength >= thresh, direction > .5)) / 
                      nrow(filter(., label == 1))}
    l1[i] <- l1_error(strength, net)
  }
  message("L1: ", paste0(l1, collapse=", "))  
  message("TPR: ", paste0(round(tpr, 2), collapse=", "))  
  # Add to list
  tpr_set_uninformative[[j]] <- tpr
  l1_set_uninformative[[j]] <- l1
}
save(tpr_set_uninformative, 
     file = "vignettes/objects/tcells/tpr_uninformative.Rdata")
save(l1_set_uninformative, 
     file = "vignettes/objects/tcells/L1_uninformative.Rdata")
load(file = "objects/tcells/tpr_uninformative.Rdata")
load(file = "objects/tcells/L1_uninformative.Rdata")
colMeans(do.call("rbind", tpr_set_uninformative)) %>% round(3)
colMeans(do.call("rbind", l1_set_uninformative)) %>% round(3) 

Now, we get an optimal ordering with the canonical edges .

I use the canonical edges as a white list, limiting the DAG search only to cases where the white list is available.

Start by simulating some DAGs.

Used a custom algorithm for generating DAGs that works with a whitelist.

algo_args <- list(tabu = 50, score = "mbde", prior = "cs", whitelist = mapk)
start_net_args <- list(nodes = names(.data), whitelist = mapk)
cl <- makeCluster(16)
dags_informative <- boot_dags(.data, R = 1000, cluster = cl, m = nrow(.data), 
                              algorithm = "tabu",algorithm.args = algo_args,
                              random.graph.args = start_net_args)
save(dags_informative, file = "vignettes/objects/tcells/dags_informative.Rdata")
load(dags_informative, file = "objects/tcells/dags_informative.Rdata")
cl <- makeCluster(4)
al_results_informative <- active_learning(dags_informative, candidates, beta = prior, cluster = cl, debug = TRUE)
cl <- makeCluster(16)
target_ordering <- c("none", al_results_informative$selected)
l1_set_informative <- list() # L1 Error rates
tpr_set_informative <- list() # True positive rates
thresh <- uninformative_presence # Edge detection threshold for calculating tpr
start_net_args <- list(nodes = names(.data), method = "ic-dag", every = 100, 
                       max.in.degree = 3, max.out.degree = 3)
for(j in 1:100){
  # Get a random ordering of interventions
  print(target_ordering)
  # Create arrays to hold stats after each experiment
  l1 <- rep(NA, length(target_ordering))
  tpr <- rep(NA, length(target_ordering))
  for(i in 1:length(target_ordering)){
    # Simulate an experiment and combine it with historic data
    .sim <- sim_batch(model, target_ordering[1:i])
    ints <- attr(.sim, "ints")
    # Simulate learning of the DAG from the experiment.  
    algo_args <- list(tabu = 50, score = "mbde", exp = ints, 
                      prior = "cs", maxp = 4, iss = 1)
    strength <- boot_dags(.sim, cluster = cl, R = 200,
                         algorithm = "tabu", algorithm.args = algo_args,
                        random.graph.args = start_net_args) %>%
        custom.strength(names(.sim), cpdag = FALSE)
    # Collect the stats
    tpr[i] <- get_performance(strength, fit2net(net), consider_direction = TRUE) %>%
                      {nrow(filter(., label == 1, strength >= thresh, direction > .5)) / 
                      nrow(filter(., label == 1))}
    l1[i] <- l1_error(strength, net)
  }
  message("L1: ", paste0(l1, collapse=", "))  
  message("TPR: ", paste0(round(tpr, 2), collapse=", "))  
  # Add to list
  tpr_set_informative[[j]] <- tpr
  l1_set_informative[[j]] <- l1
}
save(tpr_set_informative, 
     file = "vignettes/objects/tcells/tpr_informative.Rdata")
save(l1_set_informative, 
     file = "vignettes/objects/tcells/L1_informative.Rdata")
load(file = "objects/tcells/tpr_informative.Rdata")
load(file = "objects/tcells/L1_informative.Rdata")
do.call("rbind", tpr_set_informative) %>% colMeans %>% round(3)
do.call("rbind", l1_set_informative) %>% colMeans %>% round(3) 


robertness/bninfo documentation built on May 27, 2019, 10:32 a.m.