inst/extdata/scripts/no_cycle_processing.R

library(bninfo)
library(stringr)
library(parallel)
process <- function(raw){
  arcsinh <- function(x) log(x + sqrt(x^2 + 1))
  raw %>%
    setNames(str_replace_all(names(.), pattern = "X\\.", "")) %>%
    setNames(str_replace_all(names(.), pattern = "\\.", "")) %>%
    transmute(IGFRinh = IGFRinh,
              AKTinh = AKTinh,
              mTorinh = mTorinh,
              PI3Kinh = PI3Kinh,
              IGFR = IGFR_p / (IGF__IGFR +
                                 IGF__IGFR_p +
                                 IGF__IGFR_p__PTB1B +
                                 IGFR +
                                 IGFR__IGFRinh +
                                 IGFR_p +
                                 IGFR_p__PTB1B),
              IRS = IRS_p / (IRS +
                               IRS__IGF__IGFR_p +
                               IRS__IGFR_p +
                               IRS_p +
                               IRS_p__Grb2SOS +
                               IRS_p__Grb2SOS__RasGDP +
                               IRS_p__Grb2SOS__RasGTP +
                               IRS_p__IGF__IGFR_p +
                               IRS_p__IGFR_p +
                               IRS_p__PI3K +
                               IRS_p__PI3K__AKT +
                               IRS_p__PI3K__AKT__AKTinh +
                               IRS_p__PI3K__AKT_p +
                               IRS_p__PI3K__Grb2SOS +
                               IRS_p__PI3K__Grb2SOS__RasGDP +
                               IRS_p__PI3K__Grb2SOS__RasGTP +
                               IRS_p__PI3K__PI3Kinh),
              PI3K = IRS_p__PI3K / (ERK_p_p__IRS_p__PI3K__Grb2SOS +
                                      IRS_p__PI3K +
                                      IRS_p__PI3K__AKT +
                                      IRS_p__PI3K__AKT__AKTinh +
                                      IRS_p__PI3K__AKT_p +
                                      IRS_p__PI3K__Grb2SOS +
                                      IRS_p__PI3K__Grb2SOS__RasGDP +
                                      IRS_p__PI3K__Grb2SOS__RasGTP +
                                      IRS_p__PI3K__PI3Kinh +
                                      mTor_p__IRS_p__PI3K +
                                      mTor_p__IRS_p__PI3K__AKT +
                                      mTor_p__IRS_p__PI3K__AKT__AKTinh +
                                      mTor_p__IRS_p__PI3K__Grb2SOS +
                                      mTor_p__IRS_p__PI3K__Grb2SOS__RasGDP +
                                      mTor_p__IRS_p__PI3K__PI3Kinh +
                                      PI3K + PI3K__PI3Kinh +
                                      PI3Kinh),
              GRB = (IRS_p__Grb2SOS + IRS_p__PI3K__Grb2SOS) / (#ERK_p_p__IRS_p__Grb2SOS +
                #ERK_p_p__IRS_p__PI3K__Grb2SOS +
                Grb2SOS +
                  IRS_p__Grb2SOS +
                  IRS_p__PI3K__Grb2SOS +
                  IRS_p__Grb2SOS__RasGDP +
                  IRS_p__Grb2SOS__RasGTP +
                  IRS_p__PI3K__Grb2SOS__RasGDP +
                  IRS_p__PI3K__Grb2SOS__RasGTP +
                  mTor_p__IRS_p__Grb2SOS +
                  mTor_p__IRS_p__Grb2SOS__RasGDP +
                  mTor_p__IRS_p__PI3K__Grb2SOS +
                  mTor_p__IRS_p__PI3K__Grb2SOS__RasGDP),
              RAS = RasGTP / (AKT_p__RasGDP +
                                AKT_p__RasGTP +
                                IRS_p__Grb2SOS__RasGDP +
                                IRS_p__Grb2SOS__RasGTP +
                                IRS_p__PI3K__Grb2SOS__RasGDP +
                                IRS_p__PI3K__Grb2SOS__RasGTP +
                                mTor_p__IRS_p__Grb2SOS__RasGDP +
                                mTor_p__IRS_p__PI3K__Grb2SOS__RasGDP +
                                RasGDP +
                                RasGTP +
                                RasGTP__GTPase +
                                RasGTP__MEK +
                                RasGTP__MEK_p),
              MEK = MEK_p / (MEK + MEK_p +
                               MEK_p__ERK +
                               MEK_p__ERK_p +
                               P1__MEK_p +
                               RasGTP__MEK +
                               RasGTP__MEK_p),
              ERK = ERK_p_p /(ERK +
                                ERK_p +
                                ERK_p_p +
                                ERK_p_p__IRS_p__Grb2SOS +
                                ERK_p_p__IRS_p__PI3K__Grb2SOS +
                                MEK_p__ERK +
                                MEK_p__ERK_p +
                                MKP__ERK_p +
                                MKP__ERK_p_p),
              AKT = AKT_p / (AKT +
                               AKT__AKTinh +
                               AKT_p +
                               AKT_p__mTor +
                               AKT_p__RasGDP +
                               AKT_p__RasGTP +
                               IRS_p__PI3K__AKT +
                               IRS_p__PI3K__AKT__AKTinh +
                               IRS_p__PI3K__AKT_p +
                               mTor_p__IRS_p__PI3K__AKT +
                               mTor_p__IRS_p__PI3K__AKT__AKTinh +
                               Pase__AKT_p),
              MTOR = mTor_p / (AKT_p__mTor + mTor +
                                 mTor__mTorinh +
                                 mTor_p +
                                 mTor_p__IRS +
                                 mTor_p__IRS_p +
                                 mTor_p__IRS_p__Grb2SOS +
                                 mTor_p__IRS_p__Grb2SOS__RasGDP +
                                 mTor_p__IRS_p__PI3K +
                                 mTor_p__IRS_p__PI3K__AKT +
                                 mTor_p__IRS_p__PI3K__AKT__AKTinh +
                                 mTor_p__IRS_p__PI3K__Grb2SOS +
                                 mTor_p__IRS_p__PI3K__Grb2SOS__RasGDP +
                                 mTor_p__IRS_p__PI3K__PI3Kinh)) %>%
    mutate(IGFRinh = arcsinh(IGFRinh), AKTinh = arcsinh(AKTinh), mTorinh = arcsinh(mTorinh), PI3Kinh = arcsinh(PI3Kinh))
}
# Process the data
path_docs <- c("/Users/robertness/Dropbox/code/bninfo/inst/extdata/copasi_data/no_cycle_no_inh.txt",
               "/Users/robertness/Dropbox/code/bninfo/inst/extdata/copasi_data/no_cycle_with_inh.txt")
raw_dfs <- lapply(path_docs, read.delim, header = TRUE)
.data <- lapply(raw_dfs, process) %>%
  {do.call("rbind", .)} %>%
  select(-IGFRinh, -AKTinh, -mTorinh, -PI3Kinh) %>%
  sample_n(10000) %>%
  discretize(method = "hartemink", breaks = 3, ibreaks = 60)
for(i in 1:ncol(.data)){
  levels(.data[, i]) <-  c("low", "med", "high")
}
node_names <- c("IGFR", "IRS", "PI3K", "GRB", "RAS", "MEK", "ERK", "AKT", "MTOR")
truth_arcs <- matrix(
  c("IGFR", "IRS",
    "IRS", "PI3K",
    "PI3K", "AKT",
    "AKT", "MTOR",
    "AKT", "RAS",
    "IRS", "GRB",
    "PI3K", "GRB",
    "GRB", "RAS",
    "RAS", "MEK",
    "MEK", "ERK"), byrow =T, ncol = 2)
ground_truth <- empty.graph(node_names)
arcs(ground_truth) <- truth_arcs
graphviz.plot(ground_truth)
igfr_model <- bn.fit(ground_truth, .data, method = "bayes")


#### Trajectories
library(parallel)
library(combinat)
int_targets <- node_names
permutations <- permn(setdiff(int_targets, c("RAS", "MEK", "ERK")))
cbn <- NULL
for(i in 1:(length(int_targets) - 1)){
  cbn_list_names <- combn(int_targets, i) %>%
    apply(., 2, list) %>%
    lapply(unlist) %>%
    sapply(function(item) paste0(sort(item), collapse="-"))
  cbn_list <- as.list(rep(NA, length(cbn_list_names)))
  names(cbn_list) <- cbn_list_names
  cbn <- c(cbn, cbn_list)
}
cbn <- c(cbn, list(NA))
names(cbn)[length(cbn)] <- paste0(sort(int_targets), collapse = "-")
.new_ordering <- function(ordering, starting_boot, wl = NULL, bl = NULL){
  entropies <- mean(orientation_entropy(starting_boot))
  for(i in 1:length(ordering)){
    sub_order_name <- paste0(sort(ordering[1:i]), collapse = "-")
    print(sub_order_name)
    # To avoid repeats, only compute new entropies. This should take time
    prior_entropy <- cbn[[sub_order_name]]
    if(is.na(prior_entropy)){
      int_dex <- c(which(node_names %in% ordering[1:i]))
      int_data <- filter(idata, INT %in% int_dex) %>%
        rbind(mutate(sample_n(obs_data, nrow(obs_data), replace = TRUE), INT = NA)) # Add some of original data, resampled.
      INT <- lapply(1:9, function(x) {which(int_data$INT == x)})
      names(INT) <- node_names
      boot_i <- boot.strength(int_data[, 1:9], cl, algorithm = "tabu",
                              algorithm.args = list(score = "mbde",
                                                    iss = 50,
                                                    exp = INT,
                                                    whitelist = wl,
                                                    blacklist = bl,
                                                    tabu = 50), cpdag = FALSE)
      causal_entropy_i <- mean(orientation_entropy(boot_i))
      cbn[[sub_order_name]] <<- causal_entropy_i
    } else {
      causal_entropy_i <- prior_entropy
    }
    entropies <- c(entropies, causal_entropy_i)
  }
  names(entropies) <- c("obs", ordering)
  entropies
}

wl <- matrix(c("AKT", "RAS",
               "GRB", "RAS",
               "RAS", "MEK",
               "MEK", "ERK"),
             ncol = 2, byrow = T)
cl <- makeCluster(4)
obs_data <- ideal_observational_data(igfr_model, 1000)
boot_directed <- boot.strength(obs_data, algorithm = "tabu", cpdag = FALSE, cluster = cl, R = 500,
                               algorithm.args = list(score = "bde", iss = 50, whitelist = wl))
int_data <- ideal_intervention_data(igfr_model, 1000)
INT <- lapply(1:9, function(x) {which(int_data$INT == x)})
names(INT) <- names(int_data)[1:9]
int <- cbind("INT", nodes(ground_truth))
boot_result1 <- boot.strength(int_data[, 1:9], algorithm = "tabu", cpdag = FALSE, cluster = cl,
                             algorithm.args = list(score = "mbde", exp = INT, iss = 50,
                                                   whitelist = wl), R = 500)
boot_result2 <-  boot.strength(mutate(int_data, INT = factor(INT)), algorithm = "tabu", cpdag = FALSE, cluster = cl,
                             algorithm.args = list(score = "bde", iss = 50,
                                                   whitelist = rbind(wl, int)), R = 500) %>%
  filter(from != "INT") %>%
  filter(to != "INT")
x1 <- sort(reduce_averaging(boot_directed)$direction)
x2 <- sort(reduce_averaging(boot_result)$direction)

centropy <- Vectorize(function(x){
  if(x == 0) return(0)
  if(x == 1) return(0)
  (-log2(x) * x + -log2(1- x) * (1-x))
})




x1 <- boot_directed %>%
  reduce_averaging %>%
  filter(strength > .95) %>%
  mutate(edges = arcs2names(.))
x2 <- boot_result1 %>%
  reduce_averaging %>%
  mutate(edges = arcs2names(.)) %>%
  filter(edges %in% x1$edges)

### Trying with the Sachs data.
isachs <- read.table("/Users/robertness/Downloads/sachs.interventional.txt", header = T,
               colClasses = "factor")
INT <- sapply(1:11, function(x) {which(isachs$INT == x) })
nodes <- names(isachs)[1:11]
names(INT) <- nodes
start <- random.graph(nodes = nodes, method = "melancon", num = 500, burn.in = 10^5, every = 100)
netlist <- lapply(start, function(net) {
  tabu(isachs[, 1:11], score = "mbde", exp = INT,
       iss = 1, start = net, tabu = 50)
  })
intscore <- custom.strength(netlist, nodes = nodes, cpdag = FALSE)
########
averaging_entropies_directed <- lapply(permutations, .new_ordering,
                                       starting_boot = boot_directed, wl = wl)
save(averaging_entropies_directed,
     file = "inst/extdata/sim_results/entropy_trajectory_simulations/IGFR_model/IGFR_averaging_entropies_directed_3-31-16_no_bayes_50iss.Rdata")



net <- gs(.data, alpha = .01, undirected = TRUE)
performance_plot(net, moral(ground_truth), plot_truth = FALSE)
performance_plot(net, moral(ground_truth), plot_truth = TRUE)
net <- iamb(.data, undirected = TRUE)
performance_plot(net, moral(ground_truth), plot_truth = FALSE)
performance_plot(net, moral(ground_truth), plot_truth = TRUE)
net <- fast.iamb(.data, undirected = TRUE)
performance_plot(net, moral(ground_truth), plot_truth = FALSE)
performance_plot(net, moral(ground_truth), plot_truth = TRUE)
net <- inter.iamb(.data, undirected = TRUE)
performance_plot(net, moral(ground_truth), plot_truth = FALSE)
performance_plot(net, moral(ground_truth), plot_truth = TRUE)

cl <- makeCluster(4)

boot <- boot.strength(.data, cluster = cl, algorithm = "gs",
                algorithm.args = list(undirected = TRUE, alpha = .2))
strength_plot(boot, moral(ground_truth),directed_edges = FALSE,plot_truth = TRUE)
boot %>%
  reduce_averaging %>%
  select(from, to, strength)
################################################################
# Testing with a whitelist
################################################################
wl <- matrix(c("IRS", "PI3K",
               "PI3K", "IRS"), ncol = 2, byrow = T)
cl <- makeCluster(4)
boot <- boot.strength(.data, cluster = cl, algorithm = "gs",
                      algorithm.args = list(undirected = FALSE,
                                            whitelist = wl))
strength_plot(boot, moral(ground_truth),directed_edges = FALSE,plot_truth = TRUE)
boot %>%
  reduce_averaging %>%
  select(from, to, strength, direction)
robertness/bninfo documentation built on May 27, 2019, 10:32 a.m.