R/simulation.R

#' Fit Gaussian Random Regression Parameters to a Network Structure
#' Fits Gaussian regression CPDs to a network skeleton to random Gaussian data.
#' @param a object of the bn class
#' @return an object that inherits from bn.fit and bn.fit.gnet.
#' @export
sim_gbn <- function(net){
  .nnodes <- nnodes(net)
  # sim functions in gm sim would replace this.
  gbn <- matrix(rnorm(.nnodes^2), ncol = .nnodes) %>% # Sim some random data ...
    as.data.frame %>%
    setNames(nodes(net)) %>%
    {bn.fit(net, data = ., method = "mle")} # ... so I can initialize MLE parameters.
}

#' Simulate experimental data
#'
#' The bnlearn package uses the \emph{rbn} function to simulate data from a Bayesian network.
#' The following functions simulate a special set of datasets.
#'
#' The conditional probabilities in the Bayesian network may be such that some outcomes are
#' highly improbable.  When some variables rarely or never take on certain states in the data,
#' it can be difficult to infer network structure.
#'
#' \emph{fix_node} simulates data by fixing a single variable at one of its levels and simulating
#' from the resulting conditional probability distribution of the other variables given the fix.
#'
#' \emph{rbn_fixed} uses fix_node to simulate a full dataset by fixing each node at each of its
#' levels.  The resulting data does not match the underlying joint distribution because the
#' marginal distributions are changed.  However, the simulation will sample low probability outcomes,
#' making it easy to detect pairwise relationships between nodes.  In systems biology, this is
#' analogous to a set of broad perturbations that enable full measurement of the sample space.
#'
#' \emph{do_int} simulates data by applying a targeted intervention that fixes a single variable
#' at one of its levels, creating a mutilated network where the target is d-seperated from all of
#' its ancestors in the original network, i.e. Pearl's \emph{do-calculus}.
#'
#' \emph{rbn_ideal} uses do_int to simulate a full dataset by applying a targeted intervention
#' to each node, fixing the target at each of its levels.
#'
#' \emph{rbn_inhibition} uses do_int to simulate a full dataset by applying a targeted intervention
#' to each node, fixing the target at its lowest level.  The lowest level is assumed to be the first
#' level of the factor's levels.
#'
#' \emph{rbn_activation} uses do_int to simulate a full dataset by applying a targeted intervention
#' to each node, fixing the target at its highest level.  The highest level is assumed to be the first
#' level of the factor's levels.
#'
#' @param fitted_net a fitted Bayesian network, object of class bn.fit
#' @param the name of node to targeted for intervention
#' @param level the factor level of node where the intervention will fix
#' @param n the number of observations to simulate.
#' @return a data frame of simulated observations corresponding to the intervention.  If applicable,
#' the data frame contains a 'target' attribute containing a character vector, the ith element of
#' the vector is the intervention target in the ith row.
#' @export
fix_node <- function(fitted_net, node, level, n){
  ev_list <- structure(list(level), names = node)
  sampling <- cpdist(fitted_net, names(fitted_net), evidence = ev_list, method= "lw") %>%
    sample_n(n) %>%
    set_rownames(NULL)
  sampling
}
#' @rdname fix_node
#' @export
rbn_fixed <- function(fitted_net, n){
  sim <- lapply(fitted_net, function(item){
    dimnames(item$prob)[[1]] %>% # item_levels
    lapply(function(item_level) {
      fix_node(fitted_net, item$node, item_level, n) %>%
        mutate(target = as.character(item$node))
    }) %>%
      {do.call("rbind", .)} %>%
      set_rownames(NULL)
  }) %>%
    {do.call("rbind", .)} %>%
    set_rownames(NULL)
  attr(sim, "target") <- sim$target
  select(sim, -target)
}
#' @rdname fix_node
#' @export
do_int <- function(fitted_net, node, level, n){
  ev_list <- structure(list(level), names = node)
  mutilated_net <- mutilated(fitted_net, ev_list)
  rbn(mutilated_net, n)
}
#' @rdname fix_node
#' @export
rbn_ideal <- function(fitted_net, n, targets = names(fitted_net)){
  sim <- lapply(fitted_net[targets], function(item){
    dimnames(item$prob)[[1]] %>% # item_levels
      lapply(function(item_level) {
        do_int(fitted_net, item$node, item_level, n) %>%
        mutate(target = as.character(item$node))
      }) %>%
      {do.call("rbind", .)} %>%
      set_rownames(NULL)
    }) %>%
    {do.call("rbind", .)} %>%
    set_rownames(NULL)
  attr(sim, "target") <- sim$target
  select(sim, -target)
}
#' @rdname fix_node
#' @export
rbn_inhibition <- function(fitted_net, n,  targets = names(fitted_net)){
  sim <- lapply(fitted_net[targets], function(item){
    lowest_level <- dimnames(item$prob)[[1]][1]
    do_int(fitted_net, item$node, lowest_level, n) %>%
      mutate(target = as.character(item$node)) %>%
      set_rownames(NULL)
  }) %>%
  {do.call("rbind", .)} %>%
    set_rownames(NULL)
  attr(sim, "target") <- sim$target
  select(sim, -target)
}
#' @rdname fix_node
#' @export
rbn_activation <- function(fitted_net, n, targets = names(fitted_net)){
  sim <- lapply(fitted_net[targets], function(item){
    levels <- dimnames(item$prob)[[1]]
    lowest_level <- levels[length(levels)]
    do_int(fitted_net, item$node, lowest_level, n)  %>%
      mutate(target = as.character(item$node)) %>%
      set_rownames(NULL)
  }) %>%
    {do.call("rbind", .)} %>%
    set_rownames(NULL)
  attr(sim, "target") <- sim$target
  select(sim, -target)
}
robertness/bninfo documentation built on May 27, 2019, 10:32 a.m.