library(dplyr) library(bninfo) library(parallel) library(combinat) library(stringr)
data(tcell_examples) ground_truth <- tcell_examples$net d_file <- system.file("extdata", "sachs.discretised.txt", package = "bninfo") .data <- read.delim(d_file, header = TRUE,sep = " ") cl <- makeCluster(4)
Construction of edge probabilities purely from historic data.
boot <- boot.strength(.data, algorithm = "tabu", R = 1000, m = nrow(.data), cluster = cl, algorithm.args = list(score = "bde", tabu = 50, prior = "uniform"), cpdag = TRUE)
Now bring in a subset of canonical edges; PKC and PKA activation of the MAPK cascade:
prior_edges_mat <- matrix(c("PKC", "Raf", "PKA", "Raf", "Raf", "Mek", "Mek", "Erk"), dimnames = list(NULL, c("from", "to")), ncol = 2, byrow = T) graphviz.plot(construct_bn(prior_edges_mat), shape = "rectangle")
Now incorporating these edges into the construction of edges from historic data:
boot_informed <- boot.strength(.data, algorithm = "tabu", cluster = cl, R = 1000, m = nrow(.data), algorithm.args = list(score = "bde", tabu = 50, whitelist = prior_edges_mat), cpdag = FALSE)
Visualize improvement in probability of edge presence:
prior_edges_df <- data.frame(prior_edges_mat, type = "prior") highlight <- boot_informed %>% transmute( from = from, to = to, strength0 = as.numeric(boot$strength), direction0 = as.numeric(boot$direction), prob0 = strength0 * direction0, presence0 = edge_entropy(boot, presence_only = T), orientation0 = orientation_entropy(boot), strength1 = as.numeric(boot_informed$strength), direction1 = as.numeric(boot_informed$direction), prob1 = strength1 * direction1, presence1 = edge_entropy(boot_informed, presence_only = T), orientation1 = orientation_entropy(boot_informed), entropy_diff = orientation0 - orientation1, edge_name = paste0(from, "->", to)) %>% merge(name_edge_df(ground_truth), by = "edge_name") %>% mutate(from = from.x, to = to.x, type = ifelse(edge_name %in% arcs2names(prior_edges_mat), "prior", "normal")) %>% select(from, to, type, strength0, direction0, strength1, direction1, presence0, orientation0, presence1, orientation1, entropy_diff) %>% # filter(strength0 > .5, strength1 < .5) # Sanity check, this can happen, but for the sake of explanation hopefully it does not. transmute(from = from, to = to, col = ifelse(entropy_diff > .2, "blue", "black"), col = ifelse(type == "prior", "darkgrey", col), lwd = ifelse(type == "prior", 3, 1), lty = ifelse(strength1 > .5, "solid", "dashed")) %>% {list(arcs = as.matrix(.[, c("from", "to")]), col = .$col, lwd = 2, lty = .$lty)} graphviz.plot(ground_truth, highlight = highlight, shape = "rectangle")
Solid line edges had edge presence of more than .5, and blue edges had an increase of causal entropy of more the .2 bits.
Aside: constructing the highlight list for visualization:
comparison_table <- boot_informed %>% transmute( from = from, to = to, strength0 = as.numeric(boot$strength), direction0 = as.numeric(boot$direction), prob0 = strength0 * direction0, presence0 = edge_entropy(boot, presence_only = T), orientation0 = orientation_entropy(boot), strength1 = as.numeric(boot_informed$strength), direction1 = as.numeric(boot_informed$direction), prob1 = strength1 * direction1, presence1 = edge_entropy(boot_informed, presence_only = T), orientation1 = orientation_entropy(boot_informed), entropy_diff = orientation0 - orientation1, edge_name = paste0(from, "->", to)) %>% mutate(test1 = from %in% c("Plcg", "PIP3", "PIP2"), test2 = to %in% c("Plcg", "PIP3", "PIP2"), test = test1 + test2 > 0) %>% filter(test) %>% select(from, to, strength0, strength1, direction0, direction1) %>% reduce_averaging
False edges with greater than 1/3 strength in the historic data results.
boot_informed$edge_name <- apply(boot_informed[, c("from", "to")], 1, function(r){ paste0(sort(r), collapse = "--") }) boot$edge_name <- boot_informed$edge_name true_positives <- apply(arcs(ground_truth), 1, function(r){ paste0(sort(r), collapse = "--") }) false_arc_graph <- empty.graph(nodes(ground_truth)) false_arc_mat <- filter(boot_informed, !(edge_name %in% true_positives)) %>% select(from, to) %>% as.matrix arcs(false_arc_graph, ignore.cycles = T) <- false_arc_mat highlight_boot <- filter(boot, !(edge_name %in% true_positives)) %>% {list(arcs = as.matrix(select(., from, to)), lty = ifelse(.$strength >= 1/3, "solid", "dashed"), col = ifelse(.$strength >= 1/3, "red", "black"))} graphviz.plot(false_arc_graph, highlight_boot, shape = "rectangle")
False edges with greater than 1/3 strength in the canonical edges + historic data results.
highlight_informed <- filter(boot_informed, !(edge_name %in% true_positives)) %>% {list(arcs = as.matrix(select(., from, to)), lty = ifelse(.$strength >= 1/3, "solid", "dashed"), col = ifelse(.$strength >= 1/3, "red", "black"))} graphviz.plot(false_arc_graph, highlight_informed, shape = "rectangle" )
The difference is better illustrated with a heatmap:
require(graphics); require(grDevices) a_mat <- amat(false_arc_graph) mat_names <- rownames(a_mat) for(i in 1:nrow(a_mat)){ for(j in 1:ncol(a_mat)){ i_name <- mat_names[i] j_name <- mat_names[j] if(a_mat[i_name, j_name] != 0){ strength_change <- filter(boot_informed, from == i_name, to == j_name)$strength - filter(boot, from == i_name, to == j_name)$strength a_mat[i_name, j_name] <- min(strength_change, 0) a_mat[j_name, i_name] <- a_mat[j_name, i_name] } } } heatmap(a_mat, symm = T, col = heat.colors(256), main =NULL)
Editing this heatmap in a outside program produces the following more informative image:
The non-grey edges are the non-validated edges. The scale from white to red reflects reduction in presence probability. Plcg edges with Raf, Akt, Jnk show a significant decline, illustrating how the introduction of canonical edges reduces false positives.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.