The 17-node network was used in the DREAM4 Predictive Signaling Network Challenge @prill2011crowdsourcing. It contains canonical pathways downstream of four receptors (dark grey nodes in Figure 3A): two inflammatory (TNFa, IL1a), one insulin (IGF-I), and one growth factor receptor (TGFa). The network provides a simple test case for inference of causal structure with bulk data (one measurement per sample). We use this network to evaluate the relative advantages of active learning, and the importance of the use of prior information.

The objective here is to load the network and data into formats compatible with the bninfo package, as well as create a causal network that can be used to simulate experiments.

library(CellNOptR)
library(CNORode)
library(dplyr)
library(bninfo)

First I load the DREAM4 model using the CellNOptR package @terfve2012cellnoptr.

data_file <- system.file("extdata/dream_data/", "MD-LiverDREAM.csv", 
                         package = "bninfo")
model_file <- system.file("extdata/dream_data/", "PKN-LiverDREAM.sif", 
                          package = "bninfo")
cnolist <- CNOlist(data_file)
model <- readSIF(model_file) # Need to expand possible and relationships.
mod_model <- indexFinder(cnolist, model) %>%
  {compressModel(model,.)}
plotModel(mod_model, cnolist)

There are two issues here that need solving. The first issue is that there are missing values.

.data <- getSignals(cnolist)[["30"]]  %>%
  data.frame %>%
  {cbind(getStimuli(cnolist), .)}
sample_n(tbl_df(.data), 15)

The second is that the white nodes in the above plot are proteins that are not quantified in the data.

First, I use an EM-ish approach to replace the missing values. I use a multi-layer perceptron to iteratively predict the missing values given the values of neighboring proteins.

library(neuralnet)
.data <- getSignals(cnolist)[["30"]]  %>%
  data.frame %>%
  {cbind(getStimuli(cnolist), .)}
p38_NAs <- is.na(.data$p38)
mek12_NAs <- is.na(.data$mek12)
.sim_data <- .data
fit <- neuralnet(mek12 ~ igf1+il1a+tgfa+tnfa+akt+erk12+ikb+jnk12+hsp27,
              data = na.omit(.sim_data[, -9]), hidden = 3, err.fct="sse", 
              linear.output =FALSE, likelihood = TRUE)
.sim_data$mek12[mek12_NAs] <- neuralnet::compute(fit, 
                                    .sim_data[mek12_NAs, -c(9, 11)]) %$%
  net.result %>%
  as.numeric
for(i in 1:10){
  p38_trainer <- .sim_data[!p38_NAs,]
  p38_predictor <- .sim_data[p38_NAs, -9]
  p38_new_fit <- neuralnet(p38 ~ igf1+il1a+tgfa+tnfa+akt+erk12+ikb+jnk12+hsp27+mek12,
                data = p38_trainer, hidden = 5, err.fct="sse", 
                linear.output =FALSE, likelihood = TRUE) 
  p38_new <-  neuralnet::compute(p38_new_fit, p38_predictor) %$%
    net.result %>%
    as.numeric
  #print(round(p38_new, 4))
  .sim_data$p38[p38_NAs] <- p38_new
  mek12_trainer <- .sim_data[!mek12_NAs, ]
  mek12_predictor <- .sim_data[mek12_NAs, -11]
  mek12_new_fit <- neuralnet(mek12 ~ igf1+il1a+tgfa+tnfa+akt+erk12+ikb+jnk12+hsp27+p38,
                data = mek12_trainer, hidden = 5, err.fct="sse", 
                linear.output =FALSE, likelihood = TRUE) 
  mek12_new <- neuralnet::compute(mek12_new_fit, mek12_predictor) %$%
    net.result %>%
    as.numeric
  #print(mek12_new)
  .sim_data$mek12[mek12_NAs] <- mek12_new
}  
cnolist2 <- cnolist
detach(package:neuralnet)
base_mat <- cnolist2@signals[["0"]]
base_mat[is.na(base_mat)] <- 0
cnolist2@signals[["0"]] <- base_mat
cnolist2@signals[["30"]] <- .sim_data %>% 
  select(akt, erk12, ikb, jnk12, p38, hsp27, mek12) %>%
  as.matrix

Next, I use the \emph{CNORode} package, part of the CellNOptR suite, to fit the missing nodes. This uses a Hill-kinetics based model to model the flow of signal through the pathway. After this, I coerce the data from continuous variables into a binary factor.

expanded_mod <- expandGates(mod_model, maxInputsPerGate=2)  #enable AND gates for better CPDs
ode_parameters <- expanded_mod %>%
  createLBodeContPars(LB_n = 1, LB_k = 0.1,
                      LB_tau = 0.01, UB_n = 5, UB_k = 0.9, 
                      UB_tau = 10, default_n = 3, default_k = 0.9, 
                      default_tau = 1, opt_n = TRUE, opt_k = TRUE,
                      opt_tau = TRUE, random = FALSE)
par(mar=c(1, 1, 1, 1))
model_sim <- getLBodeModelSim(cnolist2, expanded_mod, ode_parameters, 
                               timeSignals=seq(0,30,0.5))
inhib_mat <- getInhibitors(cnolist)
targets <- colnames(inhib_mat)
inhibs <- paste0("inh_", targets)
colnames(inhib_mat) <- inhibs
.data <- model_sim[[length(model_sim)]]  %>%
  apply(2, round) %>%
  data.frame %>%
  set_names(mod_model$namesSpecies) %>%
  select(-tgfa, -igf1, -tnfa, -il1a) %>%
  {cbind(getStimuli(cnolist), .)} %>%
  filter(!is.na(map3k7)) %>%
  lapply(factor) %>%
  data.frame 
tbl_df(.data)

The experiment has inhibitions on nodes ikk, mek12, pi3k, and p38. I convert these interventions to the list array format which can be passed to the learning algorithms in the \emph{bnlearn} package @scutari2009learning that \emph{bninfo} relies upon.

inhib_mat <- inhib_mat %>%
  set_colnames(c("ikk", "mek12", "pi3k", "p38")) %>%
  data.frame
dream_ints <- {setdiff(names(.data), names(inhib_mat))} %>%
  {structure(lapply(., function(item) integer(0)), names = .)} %>%
  c(., lapply(inhib_mat, function(item) which(item == 1))) %>%
  .[names(.data)]
dream_ints

Next, I convert the graph structure to \emph{bnlearn}'s "bn" class, which is what we use in \emph{bninfo}.

gt <- empty.graph(setdiff(names(.data), inhibs))
arcs(gt) <-  model2sif(mod_model)[, c(1, 3)] %>%
  set_colnames(c("from", "to"))
graphviz.plot(gt, shape = "rectangle")

I now fit a causal network on the DREAM data and network structure. This will allow us to simulate intervention experiments. To accomplish this, I model the interventions in the data as nodes themselves, and fit that network. Then, I use a particle-based MCMC approach to simulate data with no interventions, and use that to refit a network without intervention nodes.

gt_inh <- empty.graph(c(names(.data), inhibs))
arcs(gt_inh) <- arcs(gt) %>%
  rbind(cbind(from = inhibs, to = targets))
full_data <- inhib_mat %>%
  #lapply(factor) %>%
  #data.frame %>% 
  set_names(inhibs) %>%
  cbind(.data)
full_net <- bn.fit(gt_inh, data = full_data, method = "bayes")
sim_data <- cpdist(full_net, names(.data), (inh_ikk == "0") & (inh_mek12 == "0") & (inh_pi3k == "0") & (inh_p38 == "0"))  
net <- bn.fit(gt, data = sim_data, method = "bayes")

Finally, we save the DREAM model as a list.

dream <- list(data = .data, ints = dream_ints, net = gt, model = net)
devtools::use_data(dream, overwrite = TRUE)

References:



robertness/bninfo documentation built on May 27, 2019, 10:32 a.m.