library(dplyr)
library(bninfo)
library(parallel)
library(combinat)
library(stringr)
data(tcell_examples)
ground_truth <- tcell_examples$net
d_file <- system.file("extdata", "sachs.discretised.txt", package = "bninfo")
.data <- read.delim(d_file, header = TRUE,sep = " ")
cl <- makeCluster(4)

Construction of edge probabilities purely from historic data.

boot <- boot.strength(.data, algorithm = "tabu", 
                      R = 1000, m = nrow(.data),
                      cluster = cl,
              algorithm.args = list(score = "bde", 
                                    tabu = 50,
                                    prior = "uniform"), 
              cpdag = TRUE)

Now bring in a subset of canonical edges; PKC and PKA activation of the MAPK cascade:

prior_edges_mat <- matrix(c("PKC", "Raf", 
                            "PKA", "Raf",
                            "Raf", "Mek",
                            "Mek", "Erk"), 
                          dimnames = list(NULL, c("from", "to")), 
                          ncol = 2, byrow = T)
graphviz.plot(construct_bn(prior_edges_mat), shape = "rectangle")

Now incorporating these edges into the construction of edges from historic data:

boot_informed <- boot.strength(.data, algorithm = "tabu", 
                            cluster = cl, 
                            R = 1000, m = nrow(.data),
              algorithm.args = list(score = "bde", tabu = 50,
                                    whitelist = prior_edges_mat),
              cpdag = FALSE)

Visualize improvement in probability of edge presence:

prior_edges_df <- data.frame(prior_edges_mat, type = "prior") 
highlight <- boot_informed %>%
  transmute(
    from = from,
    to = to, 
    strength0 = as.numeric(boot$strength),
    direction0 = as.numeric(boot$direction),
    prob0 = strength0 * direction0,
    presence0 = edge_entropy(boot, presence_only = T),
    orientation0 = orientation_entropy(boot),
    strength1 = as.numeric(boot_informed$strength),
    direction1 = as.numeric(boot_informed$direction),
    prob1 = strength1 * direction1,
    presence1 = edge_entropy(boot_informed, presence_only = T),
    orientation1 = orientation_entropy(boot_informed),
    entropy_diff = orientation0 - orientation1,
    edge_name = paste0(from, "->", to)) %>%
  merge(name_edge_df(ground_truth), by = "edge_name") %>%
  mutate(from = from.x, to = to.x, 
         type = ifelse(edge_name %in% arcs2names(prior_edges_mat), 
                       "prior", "normal")) %>%
  select(from, to, type,
         strength0, direction0, strength1, direction1,
         presence0, orientation0, presence1, orientation1,
         entropy_diff) %>% 
#  filter(strength0 > .5, strength1 < .5) # Sanity check, this can happen, but for the sake of explanation hopefully it does not.
  transmute(from = from, to = to,
            col = ifelse(entropy_diff > .2, "blue", "black"),
            col = ifelse(type == "prior", "darkgrey", col),
            lwd = ifelse(type == "prior", 3, 1),
            lty = ifelse(strength1 > .5, "solid", "dashed")) %>%
  {list(arcs = as.matrix(.[, c("from", "to")]), 
       col = .$col, lwd = 2, lty = .$lty)}
graphviz.plot(ground_truth, highlight = highlight, shape = "rectangle")

Solid line edges had edge presence of more than .5, and blue edges had an increase of causal entropy of more the .2 bits.

Aside: constructing the highlight list for visualization:

comparison_table <- boot_informed %>%
  transmute(
    from = from,
    to = to, 
    strength0 = as.numeric(boot$strength),
    direction0 = as.numeric(boot$direction),
    prob0 = strength0 * direction0,
    presence0 = edge_entropy(boot, presence_only = T),
    orientation0 = orientation_entropy(boot),
    strength1 = as.numeric(boot_informed$strength),
    direction1 = as.numeric(boot_informed$direction),
    prob1 = strength1 * direction1,
    presence1 = edge_entropy(boot_informed, presence_only = T),
    orientation1 = orientation_entropy(boot_informed),
    entropy_diff = orientation0 - orientation1,
    edge_name = paste0(from, "->", to)) %>%
  mutate(test1 = from %in% c("Plcg", "PIP3", "PIP2"),
         test2 = to %in% c("Plcg", "PIP3", "PIP2"),
         test = test1 + test2 > 0) %>%
  filter(test) %>%
  select(from, to, strength0, strength1, direction0, direction1) %>%
  reduce_averaging

False edges with greater than 1/3 strength in the historic data results.

boot_informed$edge_name <- apply(boot_informed[, c("from", "to")], 1,
                                  function(r){
                                    paste0(sort(r), collapse = "--")
                                  })
boot$edge_name <- boot_informed$edge_name
true_positives <- apply(arcs(ground_truth), 1,
                                  function(r){
                                    paste0(sort(r), collapse = "--")
                                  })
false_arc_graph <- empty.graph(nodes(ground_truth))
false_arc_mat <- filter(boot_informed, 
                        !(edge_name %in% true_positives)) %>%
  select(from, to) %>%
  as.matrix
arcs(false_arc_graph, ignore.cycles = T) <- false_arc_mat
highlight_boot <- filter(boot, !(edge_name %in% true_positives)) %>%
  {list(arcs = as.matrix(select(., from, to)),
        lty = ifelse(.$strength >= 1/3, "solid", "dashed"),
        col = ifelse(.$strength >= 1/3, "red", "black"))}
graphviz.plot(false_arc_graph, highlight_boot, shape = "rectangle")

False edges with greater than 1/3 strength in the canonical edges + historic data results.

highlight_informed <- filter(boot_informed, 
                             !(edge_name %in% true_positives)) %>%
  {list(arcs = as.matrix(select(., from, to)),
        lty = ifelse(.$strength >= 1/3, "solid", "dashed"),
        col = ifelse(.$strength >= 1/3, "red", "black"))}
graphviz.plot(false_arc_graph, highlight_informed, shape = "rectangle" )

The difference is better illustrated with a heatmap:

require(graphics); require(grDevices)
a_mat <- amat(false_arc_graph)
mat_names <- rownames(a_mat)
for(i in 1:nrow(a_mat)){
  for(j in 1:ncol(a_mat)){
    i_name <- mat_names[i]
    j_name <- mat_names[j]
    if(a_mat[i_name, j_name] != 0){
      strength_change <- filter(boot_informed, 
           from == i_name,
           to == j_name)$strength -
        filter(boot, 
           from == i_name,
           to == j_name)$strength  
      a_mat[i_name, j_name] <- min(strength_change, 0)
      a_mat[j_name, i_name] <- a_mat[j_name, i_name]
    }
  }
}
heatmap(a_mat,  symm = T,
        col = heat.colors(256),
        main =NULL)

Editing this heatmap in a outside program produces the following more informative image:

heatmap

The non-grey edges are the non-validated edges. The scale from white to red reflects reduction in presence probability. Plcg edges with Raf, Akt, Jnk show a significant decline, illustrating how the introduction of canonical edges reduces false positives.



robertness/bninfo documentation built on May 27, 2019, 10:32 a.m.