R/information_theory.R

#' Calculate entropy from a probability distribution of a random variable.
#'
#' @param prob_dist a vector corresponding to a random variable.
#' @param base base for the logarithm used in the entropy calculation
#' @export
entropy <- function(prob_dist, base=exp(1)){
  #Calculates entropy given a numeric corresponding to probabilities in a discrete distribution
  prob_dist <- bayes_correction(prob_dist)
  prob_dist <- prob_dist/sum(prob_dist)
  -sum(log(prob_dist, base=base) * prob_dist)
}

#' Query a Bayesian Network to calculate conditional entropy of one node given other nodes
#'
#' Calculates the conditional entropy of a node given other nodes in a Bayesian network.
#' Only works for discrete valued distributions fitted on data where variables are
#' non-ordered factors class ("bn.fit.dnet").  Uses gRain package to convert to query
#' the Bayesian network.
#' @param node name of the node of interest
#' @param vector of node names to condition on.
#' @param bn_model a fitted Bayesian network, an object of the class bn.fit
#' @param base the base used in entropy's logarithm calculation. By setting base equal to the number
#' of levels of the node variable, maximum entropy will be 1.  Set to 2 for entropy units in bits.
#' @export
conditional_entropy_query <- function(node, conditioning_nodes, bn_model, base=exp(1)){
  all_nodes <- nodes(bn_model)
  if(!(node %in% all_nodes)) stop("incorrect node name")
  if(!(all(conditioning_nodes %in% all_nodes))) stop("some of the conditioning node names are not in the network")
  level_list <- lapply(conditioning_nodes, function(cond_node) unique(rownames(bn_model[[cond_node]]$prob)))
  cond_node_combinations <- expand.grid(level_list, stringsAsFactors = FALSE)
  jtree <- compile(as.grain(bn_model))
  sum(apply(cond_node_combinations, 1, function(row){
    evidence <- setFinding(jtree, nodes = conditioning_nodes, states = as.character(row))
    conditioned_distribution <- querygrain(evidence, nodes = node)[[node]]
    combo_prob <- pEvidence(evidence)
    entropy(conditioned_distribution, base = base) * combo_prob
  }))
}

#' Query a Bayesian Network to calculate conditional entropy of a node's conditional probability distribution
#'
#' Calculates the conditional entropy of a node given its conditional probability distribution in a
#' in a fitted bayesian network object from the BNLearn package.  Only works for discrete valued distributions fitted on data where variables are
#' non-ordered factors class ("bn.fit.dnet").  Uses gRain package to convert to query the Bayesian network.
#' @param node name of the node of interest
#' @param bn_model a fitted Bayesian network, an object of the class bn.fit
#' @param base the base used in entropy's logarithm calculation. By setting base equal to the number
#' of levels of the node variable, maximum entropy will be 1.  Set to 2 for entropy units in bits.
#' @export
cpd_entropy_query <- function(node, bn_model,  base=exp(1)){
  node_parents <- bnlearn::parents(bn_model, node)
  if(length(node_parents) == 0){ # Return ordinary entropy if there are no parents.
    cpd <- bn_model[[node]]$prob
    return(entropy(cpd, base))
  }
  conditional_entropy_query(node, node_parents, bn_model, base = base)
}

#' Bayes Correction
#'
#' Used in calculation of averaging entropy.
bayes_correction <- function(p, e = 1e-6){
  p <- ifelse(p == 1, 1 - e, p)
  p <- ifelse(p == 0,  0 + e, p)
  p
}

#' Reduce Averaging
#'
#' Model averaging results (class \emph{bn.strength}) contain two rows for each edge, one for each edge direction.
#' This reduces results to just one row per edge.  Used in averaging_entropy.
#' @export
reduce_averaging <- function(averaging, node_names){
  arcs2names(averaging[, c(1:2)], directed_edges = FALSE) %>%
  {!duplicated(.)} %>% # pull the non-duplicated edges
  {averaging[., ]} %>% # index edges by those that are not duplicated.
  set_rownames(NULL)
}

bayes_log <- Vectorize(function(x){
  if(x == 0) x <- .Machine$double.xmin
  log2(x)
})

#' Calculate entropy on model averaging results
#'
#' Model averaging results, of the class "bn.learn" provide estimates of the probabilities the
#' true network structure includes a given edge.  This function calculates information entropy on those estimates.
#' Entropy for each edge is calculated, then the average over all edges in the model averaging results is return.
#' mean averaging entropy.
#' @param strength The results of model averaging, class \emph{bn.strength},
#' see functions \code{\link[bnlearn]{boot.strength}} and \code{\link[bnlearn]{custom.strength}}
#' @param presence_only calculate entropy only for edge presence (either direction),
#' @return a modified data frame with the entropy added as a column.
#' @export
edge_entropy <- function(strength, presence_only = FALSE){
  if(presence_only){
    result <- mutate(strength,
                      entropy = -bayes_log(strength) * strength +
                                -bayes_log(1 - strength) * (1 - strength)
                     )
    result$entropy
  }else{
    result <- mutate(strength,
                     prob = strength * direction, # P(A -> B)
                     prob_rev = strength * (1 - direction), # P(A <- b)
                     prob_none = 1 - strength, # P(A  b)
                     entropy = -bayes_log(prob) * prob +
                               -bayes_log(prob_rev) * prob_rev +
                               -bayes_log(prob_none) * (prob_none)
                     )
  }
  result$entropy
}

#' @export
orientation_entropy <- function(bn_strength){
  arc_strength <- bn_strength$strength
  arc_direction <- bn_strength$direction
  arc_strength * ( -bayes_log(arc_direction) * arc_direction +
                   -bayes_log(1 - arc_direction) * (1 - arc_direction))
}

#' L1 edge error
#' L1 edge error calculated on a strength object, given a ground truth network.
#' L1 edge error is described in Tong and Koller (2001) and Murphy (2001).
#' @param net ground truth network
#' @param bn_strength an object of class bn.strength (*bnlearn* package).
#' @export
l1_error <- function(bn_strength, net){
  ground_truth_arcs <- arcs2names(arcs(net))
  result <- reduce_averaging(bn_strength) %>%
    mutate(name = arcs2names(.[, c(1, 2)]),
           name_rev = arcs2names(.[, c(2, 1)]),
           present = ifelse(name %in% ground_truth_arcs, TRUE, FALSE),
           present_rev = ifelse(name_rev %in% ground_truth_arcs, TRUE, FALSE),
           absent = present + present_rev == 0,
           prob = strength * direction,
           prob_rev = strength * (1 - direction),
           prob_absent = 1 - prob - prob_rev,
           L1 = present * (1 - prob) + present_rev * (1 - prob_rev) + absent * (1 - prob_absent))
  sum(result$L1)
}
robertness/bninfo documentation built on May 27, 2019, 10:32 a.m.