#' Scramble Labels in a Bayesian Network
#'
#' Scrambles the labels in random networks. Intended to address any node ordering bias present in
#' random network generation algorithms
#' @param g a Bayesian network structure of class bn.struct
#' @export
scramble <- function(g){
am <- amat(g)
scramble <- sample(nodes(g))
colnames(am) <- scramble
rownames(am) <- scramble
h <- empty.graph(scramble)
amat(h) <- am
h
}
#' Confirm a list of edges forms a DAG.
test_dag <- function(arc_list, debug = debug){
if(debug) message("testing DAG constraint")
isdag <- arc_list %>%
as.data.frame %>%
unique %>%
graph.data.frame %>%
is.dag
if(!isdag && debug) message("failed DAG test")
isdag
}
#' Convert bn.fit to bn
#' @export
fit2net <- function(netfit){
net <- empty.graph(nodes(netfit))
arcs(net) <- arcs(netfit)
net
}
#' Covert Dataframe to a Model Averaging Object
#'
#' Converts a data frame to an object of bn.strength, the class
#' produced by model averaging in \emph{bn.learn}. The columns of the
#' typical bn.strength object are "from", "to", "strength", and "direction".
#' However this function does not check for presence of exclusivity of those items, it
#' assumes you know what you are doing.
#'
#' @param a data frame
#' @param threshold desired value for threshold attribute
#' @param mode desired value for mode attribute
#' @export
rebuild_strength <- function(df, threshold, mode){
if(!("data.frame" %in% class(df))) stop("df must be a data frame.")
attr(df, "threshold") <- threshold
attr(df, "mode") <- mode
class(df) <- c("data.frame", "bn.strength")
df
}
#' Convert an edge matrix to array of edge names
#'
#' Takes a matrix or data frame of edges, with from nodes in the first column, to nodes in the second.
#' Returns an array of edge names in the form of 'A->B' or 'A--B' for directed and undirected
#' edges respectfully.
#'
#' This is useful when you need a way of referencing edges, and you can easily infer the name by
#' which nodes are in the edge. For example, when combine two data frames with different annotations
#' on the same set of edges, you can create a new 'edge_name' variable in both data frames, and merge the
#' two objects, using that variable in 'by' argument for the 'merge' function.
#' @export
#' @seealso \code{\link{name_edge_df}}, \code{\link{name_edges}}
arcs2names <- function(edges, directed_edges = TRUE, return_unique = FALSE){
if(directed_edges){
return(apply(edges[, 1:2, drop = F], 1, function(edge) paste(edge, collapse = "->")))
}
apply(edges[, 1:2, drop = F], 1, function(edge) paste(sort(edge), collapse = "--"))
}
#' Return array of edge names
#'
#' Given a network structure of class \emph{bn} returns an array of all the edge names.
#' Works with both undirected, directed, and partially directed graphs.
#' @param net object of class \emph{bn}
#' @return a vector of edge names of the form "A->B" or "A--B" depending on whether or not the edge
#' is directed or not.
#' @seealso \code{\link{name_edge_df}}, \code{\link{arcs2names}}
#' @export
name_edges <- function(net){
directed_df <- data.frame(directed.arcs(net), stringsAsFactors = F) %>%
mutate(edge_names = arcs2names(., directed_edges = TRUE),
directed = TRUE)
undirected_df <- data.frame(undirected.arcs(net), stringsAsFactors = F) %>%
mutate(edge_names = arcs2names(., directed_edges = FALSE),
directed = FALSE)
unique(c(directed_df$edge_names, undirected_df$edge_names))
}
#' Convert arc matrix to a data frame with named edges
#'
#' Takes a network, and returns a dataframe containing its edges. The first to
#' columns are from and to nodes, the third column is the name of the edge, i.e. a
#' string in the form "A->B" or "A--B", depending on whether or not the edge is
#' directed. The first two columns correspond to the output of \emph{arcs},
#' \emph{undirected.arcs}, and \emph{directed.arcs}. In the case of an undirected arc,
#' these functions will produce two rows in their output matrix, eg A--B has two rows
#' "A, B", and "B, A". This function treats undirected arcs the same way.
#' @param net object of class \emph{bn}
#' @return a dataframe corresponding to a edge list, where the third column is an edge
#' attribute corresponding to a character name of the edge.
#' @export
#' @seealso \code{\link{name_edges}}, \code{\link{arcs2names}}
name_edge_df <- function(net){
rbind(
{ directed.arcs(net) %>%
data.frame(stringsAsFactors=FALSE) %>%
mutate(edge_name = arcs2names(., directed_edges = TRUE))
},
{ undirected.arcs(net) %>%
data.frame(stringsAsFactors=FALSE) %>%
mutate(edge_name = arcs2names(., directed_edges = FALSE))
}
)
}
#' DAG Simulation with Preferential Attachment
#'
#' Simulates a DAG using preferential attachment, and accepts a set of whitelisted
#' edges.
#' @param node_names The names of the nodes in the network.
#' @param whitelist A white list of edges, as a matrix of two columns.
#' The preferential attachment algorithm uses these edges as starting edges.
#' @export
ba_whitelist <- function(nodes, whitelist, ...){
start <- igraph::graph_from_edgelist(whitelist, directed = TRUE)
g <- igraph::sample_pa(length(nodes), start.graph = start, ...)
igraph::V(g)[1:igraph::vcount(start)]$name <- igraph::V(start)$name
igraph::V(g)[is.na(igraph::V(g)$name)]$name <- setdiff(nodes,igraph::V(start)$name)
net <- bninfo::construct_bn(igraph::as_edgelist(g))
net$learning$algo <- "preferential attachment"
net
}
#' Generate random graphs with a white list of edges
#'
#' The bnlearn package has a random graph generator in the 'random.graph' function.
#' These random graphs are used as starting graphs in structure inference. However,
#' if there is a set of white listed edges, some of those edges may clash with random
#' edges in the random graph. For example, the whitelisted edges and random edges might
#' introduce cycles. This function is a wrapper for bn.learn's random.graph function
#' that allows for a whitelist. It uses the same arguments as random.graph, except for the
#' whitelist argument, which is the same as the whitelist argument used in bnlearn's structure
#' inference methods. Ideally, the random.graph function would take a whitelist
#' argument and add random edges on a starter graph that included those whitelisted edges.
#' This presents a less elegant work-around, where random edges are added to nodes that are
#' not in the whitelist. If all the nodes are in the whitelist, then simply a graph based on the
#' whitelist is return.
#' @param nodes a vector of character strings, the labels of the nodes.
#' @param a matrix with two columns (optionally labeled "from" and "to"), containing a set of arcs to be included in the graph.
#' @param num an integer, the number of graphs to be generated.
#' @param ... additional arguments passed to either the preferential attachment algorithm (if there is a whitelist) or the random.graph algorithm.
#' @export
random_graph <- function(nodes, whitelist = NULL, num = 1, ..., debug = FALSE){
extra_args <- list(...)
if(is.null(whitelist)){
random.graph.args <- c(list(nodes = nodes, num = num), extra_args)
start_nets <- do.call("random.graph", random.graph.args)
if(num == 1) start_nets <- list(start_nets)
}else{
start_nets <- lapply(1:num, function(i) {
ba_whitelist_args <- c(list(nodes = nodes, whitelist = whitelist), extra_args)
do.call("ba_whitelist", ba_whitelist_args)
})
}
start_nets
}
#' Apply Structure Inference from a Starting Net
#'
#' Applies a structure inference algorithm given a starting net.
#' @param net A starting net
#' @param algorithm character string for the structure inference algorithm to be applied.
#' @param algorithm.args named list of the arguments to be passed to the algorithm.
#' @param m a positive integer, the size of each bootstrap replicate.
#' @param int_array provided only if using the "mbde" score for causal inference, provide a character array where each element corresponds to a row in the dataframe, and whose value is the name of the feature that was subject to intervention in that row.
#' @export
infer_from_start_net <- function(net, algorithm, algorithm.args, m = NULL, int_array = NULL){
require(bnlearn)
if(is.null(m)) m <- nrow(algorithm.args$x)
algorithm.args$start <- net
sample_dex <- sample(nrow(algorithm.args$x), size = m, replace = TRUE)
if(algorithm.args$score == "mbde"){
sim_int <- int_array[sample_dex]
exp <- lapply(names(algorithm.args$x), function(item){
which(sim_int == item)
})
names(exp) <- names(algorithm.args$x)
algorithm.args$exp <- exp
}
algorithm.args$x <- algorithm.args$x[sample_dex, ]
do.call(algorithm, algorithm.args)
}
#' Infer Multiple Structures with Different Starting Graphs
#'
#' Infers multiple structures from different starting graphs. In each iteration
#' the data is resampled. If a whitelist is included in the inference algorithm
#' arguments, random graphs are generated from the whitelist using preferential
#' attachment. Currently not implemented for blacklists.
#' @param data the data set targeted for inference.
#' @param cluster an optional cluster object from package parallel.
#' @param R a positive integer, the number of bootstrap replicates.
#' @param m a positive integer, the size of each bootstrap replicate.
#' @param algorithm a character string, the learning algorithm to be applied to the bootstrap replicates. Possible values are gs, iamb, fast.iamb, inter.iamb, mmpc, hc, tabu, mmhc and rsmax2. See bnlearn-package and the documentation of each algorithm for details.
#' @param algorithm.args a list of extra arguments to be passed to the learning algorithm.
#' @param random.graph.args arguments to be passed in random graph generation. If a whitelist
#' is provided in algorithm.args, the arguments are used in preferential attachment. Otherwise
#' they are passed to bnlearn's random.graph generation.
#' @export
boot_dags <- function(x, cluster = NULL, R = 200, m = nrow(data), algorithm,
algorithm.args = list(), random.graph.args = list()){
algorithm.args$x <- x
random.graph.args$nodes <- names(x)
random.graph.args$num <- R
if("whitelist" %in% names(algorithm.args)){
random.graph.args$whitelist <- algorithm.args$whitelist
}
nets <- do.call("random_graph", random.graph.args)
int_array <- NULL
if("exp" %in% names(algorithm.args)){
INT <- algorithm.args$exp
int_array <- rep(NA, nrow(x))
for(l in 1:length(INT)){
int_array[INT[[l]]] <- names(INT)[l]
}
int_array[is.na(int_array)] <- "obs"
}
if(is.null(cluster)){
result <- lapply(nets, infer_from_start_net, algorithm = algorithm, algorithm.args = algorithm.args, m = m, int_array = int_array)
}else{
result <- parLapply(cl = cluster, nets, infer_from_start_net, algorithm = algorithm, algorithm.args = algorithm.args, m = m, int_array = int_array)
}
result
}
#' Construct Bayesian network structure directly from arc matrix
#'
#' @param arc_mat a matrix of arcs (edge list), from nodes in the first column, to nodes in the
#' second column.
#' @return an object of bn.fit
#' @export
construct_bn <- function(arc_mat){
node_names <- unique(c(arc_mat[, 1], arc_mat[, 2]))
net <- bnlearn::empty.graph(node_names)
bnlearn::`arcs<-`(net, value = arc_mat)
}
#' Break cycles to produce a DAG
#' Given a set of arcs of a directed graph, if the arcs contain a cycle,
#' break the cycle by reversing low probability arcs.
#' @param arc_list a list of arcs/edges from a directed graph structure
#' @param white_list a list of arcs/edges that are not to be broken.
#' @param probabilities associated with the arcs. The algorithm will prioritize
#' low probability edges for reversal.
#' @export
break_cycles <- function(arc_list, probabilities = NULL){
if(any(probabilities == 0)) stop("Don't include 0 probability edges.")
if(is.null(probabilities)) probabilities <- rep(.5, nrow(arc_list))
g <- arc_list[, c(1, 2)] %>%
as.data.frame %>%
mutate(prob = probabilities) %>%
graph.data.frame(directed = TRUE)
if(is_dag(g)) return(arc_list)
E(g)$weight <- E(g)$prob ^ -1
g_init <- g
reversed_edges <- NULL
untouchable_edges <- E(g)[prob == 1]
repeat{
path <- detect_cycle(g)
if(length(path) == 0) {
break
}
candidates_for_reversal <- E(g)[setdiff(path, c(reversed_edges, untouchable_edges))]
if(length(candidates_for_reversal) == 0) stop("There are no edges eligible for reversal. Try adjusting probabilities.")
target_edge <- candidates_for_reversal[which(candidates_for_reversal$weight ==
max(candidates_for_reversal$weight))][1]
g <- reverse_edge(g, target_edge)
E(g)$weight <- E(g_init)$weight
reversed_edges <- c(reversed_edges, target_edge)
}
get.edgelist(g) %>%
set_colnames(c("from", "to"))
}
#' Convert a DAG to an array of strings representing v-structures
#' Concatenates all the v-structures in a DAG into an array of strings. Useful for counting equivalence classes.
#' For a v-structure of X->Z<-Y, returns an element Z~X,Y, where the order of X and Y is determined
#' by a sort.
#' @export
v_array <- function(net){
mat <- vstructs(net, moral = FALSE)
apply(mat, 1, function(vs){
Ps <- sort(c(vs[1], vs[3]))
paste0(vs[2], "~", Ps[1],",", Ps[2])
})
}
#' @rdname v_array
#' @export
v_string <- function(net){
v_array(net) %>%
sort %>%
paste(collapse = ";")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.