## evaluate event as given by unevaluate expression or a list in "data"
eval.event <- function(event, data){
frame = parent.frame()
if (identical(event, TRUE))
TRUE
else if (is.language(event))
eval(event, data, frame)
else { # should be list or data.frame
r = TRUE
nms <- names(event)
for(i in seq_along(event)) ## don't use names, can be repeatead!!
r = r & (data[[nms[[i]]]] %in% event[[i]])
r
}
}
check.logical_event <- function(le, n, type){
if (!is.logical(le))
stop(type, " must evaluate to a logical vector.")
if (!(length(le) == 1L || length(le) == n))
stop("logical vector for ", type,
" is of length ", length(le),
" instead of ", n, ".")
}
# backend for conditional probability queries.
conditional.probability.query = function(fitted, event, evidence, method,
extra, probability = TRUE, cluster = NULL, debug = FALSE) {
# consider only the upper closure of event and evidence to reduce the number
# of variables in the Monte Carlo simulation.
fitted = reduce.fitted(fitted = fitted, event = event, evidence = evidence,
nodes = extra$query.nodes, method = method, debug = debug)
if (method %in% c("ls", "lw")) {
if (method == "ls") {
sampling = logic.sampling
distribution = logic.distribution
}#THEN
else if (method == "lw") {
sampling = weighting.sampling
distribution = weighting.distribution
}#ELSE
if (!is.null(cluster)) {
# get the number of slaves.
s = nSlaves(cluster)
# divide the number of particles among the slaves.
batch = n = ceiling(extra$n / s)
if (probability) {
results = parSapply(cluster, seq(s),
function(x) {
sampling(fitted = fitted, event = event, evidence = evidence,
n = n, batch = batch, debug = debug)
})
return(mean(results))
}#THEN
else {
results = parLapply(cluster, seq(s),
function(x) {
distribution(fitted = fitted, nodes = event, evidence = evidence,
n = n, batch = batch, debug = debug)
})
return(do.call(rbind, results))
}#ELSE
}#THEN
else {
if (probability) {
sampling(fitted = fitted, event = event, evidence = evidence,
n = extra$n, batch = extra$batch, debug = debug)
}#THEN
else {
distribution(fitted = fitted, nodes = event, evidence = evidence,
n = extra$n, batch = extra$batch, debug = debug)
}#ELSE
}#ELSE
}#THEN
}#CONDITIONAL.PROBABILITY.QUERY
# create an empty data frame from a bn.fit object.
fit.dummy.df = function(fitted, nodes) {
dummy = sapply(nodes, function(x) {
node = fitted[[x]]
if (is(node, "bn.fit.dnode"))
return(factor(character(0), levels = dimnames(node$prob)[[1]]))
else if (is(node, "bn.fit.onode"))
return(ordered(character(0), levels = dimnames(node$prob)[[1]]))
else if (is(node, "bn.fit.gnode"))
return(numeric(0))
})
return(minimal.data.frame(dummy))
}#FIT.DUMMY.DF
# reduce a bn.fit object to the upper closure of event and evidence nodes.
reduce.fitted = function(fitted, event, evidence, nodes, method, debug) {
if (is.null(nodes)) {
# find out which nodes are involved in the event and the evidence and
# construct their upper closure.
nodes = names(fitted)
nodes.event = nodes[nodes %in% explode(event)]
nodes.evidence = nodes[nodes %in% explode(evidence)]
upper.closure = schedule(fitted, start = union(nodes.evidence, nodes.event),
reverse = TRUE)
# check whether something went horribly wrong in getting the node labels
# and the upper closure.
if (((length(upper.closure) == 0) || !all(upper.closure %in% nodes)) ||
(!identical(evidence, TRUE) && (length(nodes.evidence) == 0)) ||
(!identical(event, TRUE) && (length(nodes.event) == 0))) {
# use all the nodes when in doubt.
event = evidence = TRUE
upper.closure = nodes
if (debug) {
cat("* checking which nodes are needed.\n")
cat(" > unable to identify query nodes.\n")
}#THEN
}#THEN
else {
if (debug) {
cat("* checking which nodes are needed.\n")
cat(" > event involves the following nodes:", nodes.event, "\n")
cat(" > evidence involves the following nodes:", nodes.evidence, "\n")
cat(" > upper closure is '", upper.closure, "'\n")
}#THEN
}#ELSE
}#THEN
else {
# construct the upper closure of the query nodes.
upper.closure = schedule(fitted, start = nodes, reverse = TRUE)
if (debug) {
cat("* using specified query nodes.\n")
cat(" > upper closure is '", upper.closure, "'\n")
}#THEN
}#ELSE
# check whether the upper closure is correct: tricky expressions are not
# always handled correctly by explode().
dummy = fit.dummy.df(fitted, upper.closure)
# testing evidence (TRUE or an expression in LS, a list in LW).
if (!(is.language(evidence) || identical(evidence, TRUE)))
try.evidence = TRUE
else
try.evidence = try(eval(evidence, dummy), silent = TRUE)
# testing event (it's the label nodes in cpdist; TRUE or an expression
# in cpquery).
if (!(is.language(event) || identical(event, TRUE)))
try.event = TRUE
else
try.event = try(eval(event, dummy), silent = TRUE)
# create the subgraph corresponding to the upper closure.
if (is.logical(try.event) && is.logical(try.evidence)) {
if (debug)
cat(" > generating observations from", length(upper.closure), "/",
length(fitted), "nodes.\n")
fitted = fitted[upper.closure]
class(fitted) = "bn.fit"
}#THEN
else {
if (debug)
cat(" > unable use the upper closure, using the whole network.\n")
}#ELSE
return(fitted)
}#REDUCE.FITTED
# compute conditional probabilities with forward/logic sampling.
logic.sampling = function(fitted, event, evidence, n, batch, debug = FALSE) {
cpxe = cpe = 0L
filtered = logical(n)
matching = logical(n)
r = logical(n)
# count how many complete batches we have to generate.
nbatches = n %/% batch
# count how many observations are in the last one.
last.one = n %% batch
for (m in c(rep(batch, nbatches), last.one)) {
# do a hard reset of generated.data, so that the memory used by the data
# set generated in the previous iteration can be garbage-collected and
# reused _before_ rbn() returns.
generated.data = NULL
# generate random data from the bayesian network.
if (m > 0) {
generated.data = rbn.backend(x = fitted, n = m)
}#THEN
else
break
if (debug)
cat("* generated", m, "samples from the bayesian network.\n")
# evaluate the expression defining the evidence.
r = eval.event(evidence, generated.data)
check.logical_event(r, m, "evidence")
# filter out the samples not matching the evidence we assume.
filtered = r & !is.na(r)
# update the global counters.
cpe = cpe + length(which(filtered))
if (debug) {
lwfilter = length(which(filtered))
if (!identical(evidence, TRUE))
cat(" > evidence matches ", lwfilter, " samples out of ", m,
" (p = ", lwfilter/m, ").\n", sep = "")
else
cat(" > evidence matches ", m, " samples out of ", m,
" (p = 1).\n", sep = "")
}#THEN
# evaluate the expression defining the event.
r = eval.event(event, generated.data)
check.logical_event(r, m, "event")
# filter out the samples not matching the event we are looking for.
matching = filtered & r & !is.na(r)
# update the global counters.
cpxe = cpxe + length(which(matching))
if (debug) {
lwmatch = length(which(matching))
lwratio = ifelse(lwfilter == 0, 0, lwmatch/lwfilter)
if (!identical(event, TRUE))
cat(" > event matches ", lwmatch, " samples out of ", lwfilter,
" (p = ", lwratio, ").\n", sep = "")
else
cat(" > event matches ", lwfilter, " samples out of ", lwfilter,
" (p = 1).\n", sep = "")
}#THEN
}#FOR
# prevent divide-by-zero errors.
result = ifelse(cpe == 0, 0, cpxe / cpe)
if (debug && (nbatches > 1)) {
cat("* generated a grand total of", n, "samples.\n")
cat(" > event matches ", cpxe, " samples out of ", cpe,
" (p = ", result, ").\n", sep = "")
}#THEN
return(result)
}#LOGIC.SAMPLING
# generate random observations from conditional distributions with forward/logic
# sampling.
logic.distribution = function(fitted, nodes, evidence, n, batch, debug = FALSE) {
filtered = logical(n)
result = NULL
# count how many complete batches we have to generate.
nbatches = n %/% batch
# count how many observations are in the last one.
last.one = n %% batch
for (m in c(rep(batch, nbatches), last.one)) {
# do a hard reset of generated.data, so that the memory used by the data
# set generated in the previous iteration can be garbage-collected and
# reused _before_ rbn() returns.
generated.data = NULL
# generate random data from the bayesian network.
if (m > 0)
generated.data = rbn.backend(x = fitted, n = m)
else
break
if (debug)
cat("* generated", m, "samples from the bayesian network.\n")
r = eval.event(evidence, generated.data)
# double check that this is a logical vector.
if (!is.logical(r))
stop("evidence must evaluate to a logical vector.")
# double check that it is of the right length.
if ((length(r) != 1) && (length(r) != m))
stop("logical vector for evidence is of length ", length(r),
" instead of ", m, ".")
# filter out the samples not matching the evidence we assume.
filtered = r & !is.na(r)
if (debug) {
lwfilter = length(which(filtered))
if (!identical(evidence, TRUE))
cat(" > evidence matches ", lwfilter, " samples out of ", m,
" (p = ", lwfilter/m, ").\n", sep = "")
else
cat(" > evidence matches ", m, " samples out of ", m,
" (p = 1).\n", sep = "")
}#THEN
# update the return value.
result = rbind(result, generated.data[filtered, nodes, drop = FALSE])
}#FOR
# reset the row names.
rownames(result) = NULL
if (debug && (nbatches > 1))
cat("* generated a grand total of", n, "samples.\n")
return(result)
}#LOGIC.DISTRIBUTION
# compute conditional probabilities with likelihood weighting.
weighting.sampling = function(fitted, event, evidence, n, batch, debug = FALSE) {
cpxe = cpe = 0
matching = logical(n)
r = logical(n)
# count how many complete batches we have to generate.
nbatches = n %/% batch
# count how many observations are in the last one.
last.one = n %% batch
weights = function(data) {
if (isTRUE(evidence))
return(rep(1, nrow(data)))
else
.Call("lw_weights",
fitted = fitted,
data = data,
by.sample = TRUE,
keep = names(evidence),
debug = debug)
}#WEIGHTS
for (m in c(rep(batch, nbatches), last.one)) {
# do a hard reset of generated.data, so that the memory used by the data
# set generated in the previous iteration can be garbage-collected and
# reused _before_ rbn() returns.
generated.data = NULL
# generate random data from the bayesian network.
if (m > 0)
generated.data = rbn.backend(x = fitted, fix = evidence, n = m)
else
break
if (debug)
cat("* generated", m, "samples from the bayesian network.\n")
# evaluate the expression defining the event.
r = eval.event(event, generated.data)
check.logical_event(r, m, "event")
# filter out the samples not matching the event we are looking for.
matching = r & !is.na(r)
# compute the probabilities and use them as weigths.
w = weights(generated.data)
cpe = cpe + sum(w)
cpxe = cpxe + sum(w[matching])
if (debug)
cat(" > event has a probability mass of ", cpxe, " out of ", cpe, ".\n", sep = "")
}#FOR
# compute the conditional probability.
result = cpxe / cpe
if (debug && (nbatches > 1)) {
cat("* generated a grand total of", n, "samples.\n")
cat(" > event has a probability mass of ", cpxe, " out of ", cpe,
" (p = ", result, ").\n", sep = "")
}#THEN
return(result)
}#WEIGHTING.SAMPLING
# generate random observations from conditional distributions with likelihood
# weighting.
weighting.distribution = function(fitted, nodes, evidence, n, batch, debug = FALSE) {
result = NULL
# count how many complete batches we have to generate.
nbatches = n %/% batch
# count how many observations are in the last one.
last.one = n %% batch
for (m in c(rep(batch, nbatches), last.one)) {
# do a hard reset of generated.data, so that the memory used by the data
# set generated in the previous iteration can be garbage-collected and
# reused _before_ rbn() returns.
generated.data = NULL
# generate random data from the bayesian network.
if (m > 0)
generated.data = rbn.backend(x = fitted, fix = evidence, n = m)
else
break
if (debug)
cat("* generated", m, "samples from the bayesian network.\n")
# update the return value.
result = rbind(result, generated.data[, nodes, drop = FALSE])
}#FOR
if (debug && (nbatches > 1))
cat("* generated a grand total of", n, "samples.\n")
return(result)
}#WEIGHTING.DISTRIBUTION
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.