# Part of the "structmcmc" package, https://github.com/rjbgoudie/structmcmc
#
# This software is distributed under the GPL-3 license.  It is free,
# open source, and has the attribution requirements (GPL Section 7) in
#   https://github.com/rjbgoudie/structmcmc
#
# Note that it is required that attributions are retained with each function.
#
# Copyright 2008 Robert J. B. Goudie, University of Warwick
#' Posterior distribution on Bayesian networks.
#'
#' Use one of a number of methods to get the posterior distribution.
#'
#' @param data The data.
#' @param method One of "exact", "mc3", "gibbs", "mj-mcmc". "mh-mcmc" is a
#'   synonym of "mc3".
#' @param prior A list of functions of the same length as \code{initial}
#'   that returns the local prior score of the corresponding node, given a
#'   numeric vector of parents. The default value \code{NULL} uses an
#'   improper uniform prior.
#' @param logScoreFUN A list of four elements:
#'   \describe{
#'     \item{offline}{A function that computes the logScore of a Bayesian
#'                    Network}
#'     \item{online}{A function that incrementally computes the logScore of a
#'                   Bayesian Network}
#'     \item{local}{A function that computes the local logScore of a
#'                  Bayesian Network}
#'     \item{prepare}{A function that prepares the data, and any further
#'                    pre-computation required by the logScore functions.}
#'   }
#'   For Multinomial-Dirichlet models, \code{\link{logScoreMultDirFUN}}
#'   returns the appropriate list; for Normal models with Zellner g-priors,
#'   \code{\link{logScoreNormalFUN}} returns the appropriate list.
#' @param logScoreParameters A list of parameters that are passed to
#'                       logScoreFUN.
#' @param constraint A matrix of dimension ncol(data) x ncol(data) giving
#'                       constraints to the sample space.
#'                       The (i, j) element is
#'                         1  if the edge i -> j is required
#'                         -1 if the edge i -> is excluded.
#'                         0  if the edge i -> j is not constrained.
#'                       The diagonal of constraint must be all 0.
#' @param maxNumberParents Integer of length 1. The maximum number of
#'   parents of any node. Default value is left to the MCMC sampler when
#'   \code{\link{mcmcposterior}} is user, or \code{\link{exactposterior}} for
#'   exact computation.
#' @param nSamples The number of samples to be draw. Set this to \code{FALSE}
#'   if using the \code{time} argument. (Only applies to MCMC.)
#' @param time The number of seconds to spend drawing samples. Set this to
#'   \code{FALSE} if using the \code{nSamples} argument. (Only applies to
#'   MCMC.)
#' @param nBurnin The number of samples to discard from the beginning of
#'   the sample. (Only applies to MCMC.)
#' @param initial An object of class 'bn'. The starting value of the
#'   MCMC. (Only applies to MCMC.)
#' @param verbose A logical. Should a progress bar be displayed?
#' @return Either a \code{bnpost} or a \code{bnpostmcmc} object.
#' @export
#' @seealso For more control, use the MCMC sampler directly,
#'   e.g. \code{\link{BNSampler}}.  Example priors
#'   \code{\link{priorGraph}}, \code{\link{priorUniform}}.
#' @examples
#' x1 <- factor(c("a", "a", "g", "c", "c", "a", "g", "a", "a"))
#' x2 <- factor(c(2, 2, 4, 3, 1, 4, 4, 4, 1))
#' x3 <- factor(c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE))
#' x <- data.frame(x1 = x1, x2 = x2, x3 = x3)
#'
#' set.seed(1234)
#' mcmc <- posterior(data = x, "mc3", nSamples = 500, nBurnin = 100)
#' ep(mcmc)
posterior <- function(data,
                      method             = "mc3",
                      prior              = priorUniform(initial),
                      logScoreFUN        = logScoreMultDirFUN(),
                      logScoreParameters = list(hyperparameters = "bdeu"),
                      constraint         = NULL,
                      maxNumberParents   = NULL,
                      nSamples           = 50000,
                      time               = F,
                      nBurnin            = 10000,
                      initial            = empty(ncol(data), "bn"),
                      verbose            = T){
  methods <- c("exact", "mc3", "mh-mcmc", "gibbs", "mj-mcmc")
  stopifnot(method      %in% methods)
  if (method == "exact"){
    exactposterior(data,
                   prior,
                   logScoreFUN,
                   logScoreParameters,
                   constraint,
                   maxNumberParents,
                   verbose)
  } else if (method == "mc3" || method == "mh-mcmc"){
    mcmcposterior(data,
                  sampler = BNSampler,
                  prior,
                  logScoreFUN,
                  logScoreParameters,
                  constraint,
                  maxNumberParents,
                  nSamples,
                  time,
                  nBurnin,
                  initial,
                  verbose)
  } else if (method == "gibbs") {
    mcmcposterior(data,
                  sampler = BNGibbsSampler,
                  prior,
                  logScoreFUN,
                  logScoreParameters,
                  constraint,
                  maxNumberParents,
                  nSamples,
                  time,
                  nBurnin,
                  initial,
                  verbose)
  } else if (method == "mj-mcmc") {
    mcmcposterior(data,
                  sampler = BNSamplerMJ,
                  prior,
                  logScoreFUN,
                  logScoreParameters,
                  constraint,
                  maxNumberParents,
                  nSamples,
                  time,
                  nBurnin,
                  initial,
                  verbose)
  } else {
    stop("Not implemented")
  }
}
#' Posterior distribution on Bayesian networks.
#'
#' Use one of a number of methods to get the posterior distribution
#'
#' @param data The data.
#' @param prior A list of functions of the same length as \code{initial}
#'   that returns the local prior score of the corresponding node, given a
#'   numeric vector of parents. The default value \code{NULL} uses an
#'   improper uniform prior.
#' @param logScoreFUN A list of four elements:
#'   \describe{
#'     \item{offline}{A function that computes the logScore of a Bayesian
#'                    Network}
#'     \item{online}{A function that incrementally computes the logScore of a
#'                   Bayesian Network}
#'     \item{local}{A function that computes the local logScore of a
#'                  Bayesian Network}
#'     \item{prepare}{A function that prepares the data, and any further
#'                    pre-computation required by the logScore functions.}
#'   }
#' @param logScoreParameters A list of parameters that are passed to
#'                       logScoreFUN.
#' @param constraint A matrix of dimension ncol(data) x ncol(data) giving
#'                       constraints to the sample space.
#'                       The (i, j) element is
#'                         1  if the edge i -> j is required
#'                         -1 if the edge i -> is excluded.
#'                         0  if the edge i -> j is not constrained.
#'                       The diagonal of constraint must be all 0.
#' @param maxNumberParents Integer of length 1. The maximum number of
#'   parents of any node. A \code{NULL} value uses the \code{ncol(data) - 1}.
#' @param verbose A logical. Should a progress bar be displayed?
#' @return A \code{bnpost} object.
#' @export
#' @seealso \code{\link{posterior}}. Example priors
#'   \code{\link{priorGraph}}, \code{\link{priorUniform}}.
exactposterior <- function(data,
                           prior              =
                             priorUniform(empty(ncol(data), "bn")),
                           logScoreFUN        = logScoreMultDirFUN(),
                           logScoreParameters = list(hyperparameters = "bdeu"),
                           constraint         = NULL,
                           maxNumberParents   = NULL,
                           verbose            = T){
  stopifnot(is.valid.localPriors(prior) || is.function(prior))
  if (!is.function(prior)){
    localPriors <- prior
    prior <- function(x){
      eval.prior(x, localPriors)
    }
  }
  if (is.null(maxNumberParents)){
    maxNumberParents <- ncol(data) - 1
  }
  nVar <- ncol(data)
  bnspace <- enumerateBNSpace(nVar)
  if (!is.null(constraint)){
    bnspace <- Filter(satisfiesConstraint, bnspace)
  }
  nm <- colnames(data)
  bnspace <- lapply(bnspace, function(x){
    names(x) <- nm
    x
  })
  class(bnspace) <- c("bn.list", "parental.list")
  logScoreOfflineFUN <- logScoreFUN$offline
  prepareDataFUN <- logScoreFUN$prepare
  logScoreParameters <- prepareDataFUN(data,
                                       logScoreParameters,
                                       checkInput = F)
  if (isTRUE(verbose)){
    progress <- txtProgressBar(max = length(bnspace), style = 3)
    setTxtProgressBar(progress, 0)
  }
  prog <- 0
  lsmd <- sapply(bnspace, function(x){
    if (isTRUE(verbose)){
      prog <<- prog + 1
      setTxtProgressBar(progress, prog)
    }
    logScoreOfflineFUN(x                  = x,
                       logScoreParameters = logScoreParameters)
  })
  if (isTRUE(verbose)){
    close(progress)
  }
  logpriors <- log(sapply(bnspace, prior))
  logScore <- lsmd + logpriors
  bnpost(bnspace     = bnspace,
         logScore    = logScore,
         data        = data,
         logScoreFUN = function(x) stop("error"))
}
#' Posterior distribution on Bayesian networks.
#'
#' Use MCMC to approximate the posterior distribution
#'
#' @param data The data.
#' @param sampler A BNSampler. eg BNSampler or BNGibbsSampler etc
#' @param prior A list of functions of the same length as \code{initial}
#'   that returns the local prior score of the corresponding node, given a
#'   numeric vector of parents. The default value \code{NULL} uses an
#'   improper uniform prior.
#' @param logScoreFUN A list of four elements:
#'   \describe{
#'     \item{offline}{A function that computes the logScore of a Bayesian
#'                    Network}
#'     \item{online}{A function that incrementally computes the logScore of a
#'                   Bayesian Network}
#'     \item{local}{A function that computes the local logScore of a
#'                  Bayesian Network}
#'     \item{prepare}{A function that prepares the data, and any further
#'                    pre-computation required by the logScore functions.}
#'   }
#' @param logScoreParameters A list of parameters that are passed to
#'                       logScoreFUN.
#' @param constraint A matrix of dimension ncol(data) x ncol(data) giving
#'                       constraints to the sample space.
#'                       The (i, j) element is
#'                         1  if the edge i -> j is required
#'                         -1 if the edge i -> is excluded.
#'                         0  if the edge i -> j is not constrained.
#'                       The diagonal of constraint must be all 0.
#' @param maxNumberParents Integer of length 1. The maximum number of
#'   parents of any node.
#' @param nSamples The number of samples to be draw. Set this to \code{FALSE}
#'   if using the \code{time} argument.
#' @param time The number of seconds to spend drawing samples. Set this to
#'   \code{FALSE} if using the \code{nSamples} argument.
#' @param nBurnin The number of samples to discard from the beginning of
#'   the sample.
#' @param initial An object of class 'bn'. The starting value of the
#'                       MCMC.
#' @param verbose A logical. Should a progress bar be displayed?
#' @return A \code{bnpostmcmc} object.
#' @seealso For more control, use the MCMC sampler directly,
#'   e.g. \code{\link{BNSampler}}. See also \code{\link{posterior}}.
#'    Example priors \code{\link{priorGraph}}, \code{\link{priorUniform}}.
#' @export
mcmcposterior <- function(data,
                          sampler            = BNSampler,
                          prior              = priorUniform(initial),
                          logScoreFUN        = logScoreMultDirFUN(),
                          logScoreParameters = list(hyperparameters = "bdeu"),
                          constraint         = NULL,
                          maxNumberParents   = NULL,
                          nSamples           = 50000,
                          time               = F,
                          nBurnin            = 10000,
                          initial            = empty(ncol(data), "bn"),
                          verbose            = T){
  stopifnot(identical(nSamples, F) + identical(time, F) == 1)
  nVar <- ncol(data)
  if (is.null(initial)){
    initial <- empty(nVar, class = "bn")
  }
  sampler <- sampler(data               = data,
                     initial            = initial,
                     prior              = prior,
                     logScoreFUN        = logScoreFUN,
                     logScoreParameters = logScoreParameters,
                     constraint         = constraint,
                     maxNumberParents   = maxNumberParents,
                     verbose            = verbose)
  samples <- draw(sampler = sampler,
                  n       = nSamples,
                  time    = time,
                  burnin  = nBurnin,
                  verbose = verbose)
  bnpostmcmc(sampler     = sampler,
             samples     = samples,
             logScoreFUN = logScoreFUN)
}
#' Posterior graph probabilities.
#'
#' method description
#'
#' @param x ...
#' @param ... Further arguments passed to method
#' @seealso \code{\link{summary.gp}},
#'   \code{\link{gp.bnpostmcmc}}, \code{\link{gp.bnpost}},
#'   \code{\link{gp.bnpostmcmc.list}}, \code{\link{gp.list}}
#' @export
gp <- function(x, ...){
  UseMethod("gp")
}
#' Posterior edge probabilities.
#'
#' Compute the posterior edge probabilities, as given by the supplied
#' object.
#'
#' @param x An object
#' @param ... Further arguments, passed to method
#' @export
#' @seealso \code{\link{ep.bnpostmcmc.list}}, \code{\link{ep.parental.list}},
#'   \code{\link{ep.bnpost}}, \code{\link{ep.table}},
#'   \code{\link{ep.parental.contingency}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' dat <- data.frame(x1 = x1, x2 = x2)
#'
#' initial <- bn(c(), c())
#' prior <- priorUniform(initial)
#'
#' sampler <- BNSampler(dat, initial, prior)
#' samples <- draw(sampler, n = 50)
#' mpost <- bnpostmcmc(sampler, samples)
#'
#' ep(mpost)
#'
#' initial <- bn(c(), c(1))
#' sampler2 <- BNSampler(dat, initial, prior)
#' samples2 <- draw(sampler2, n = 50)
#' mpost2 <- bnpostmcmc(sampler2, samples2)
#'
#' ep(bnpostmcmc.list(mpost, mpost2))
ep <- function(x, ...){
  UseMethod("ep")
}
#' Entropy.
#'
#' method description
#'
#' @param x ...
#' @param ... Further arguments passed to method
#' @export
#' @seealso \code{\link{entropy.bnpost}}
entropy <- function(x, ...){
  UseMethod("entropy")
}
#' Maximum aposteriori graph.
#'
#' method description
#'
#' @param x ...
#' @param ... Further arguments passed to method
#' @export
#' @seealso \code{\link{map.bnpost}}, \code{\link{map.bnpostmcmc}}
map <- function(x, ...){
  UseMethod("map")
}
#' Top posterior graph.
#'
#' method description
#'
#' @param x ...
#' @param ... Further arguments passed to method
#' @export
#' @seealso \code{\link{top.bnpost}}, \code{\link{top.bnpostmcmc}}
top <- function(x, ...){
  UseMethod("top")
}
#' Bayes Factor.
#'
#' method description
#'
#' @param bn1 A bn
#' @param bn2 A bn
#' @param data data
#' @param ... Further arguments passed to method
#' @export
#' @seealso \code{\link{bf.bnpostmcmc}}
bf <- function(bn1, bn2, data, ...){
  UseMethod("bf")
}
#' Posterior edge probabilities.
#'
#' Calculate edge probabilities given a list of
#' parental graphs (of class parental.list)
#'
#' @param x A parental.list
#' @param nbin The number of equally-sized bins into which parental.list into
#'          before computing the edge probabilities of each
#' @param start An integer of length 1, specifying the amount of burn-in.
#'          The samples start:end inclusive will be used.
#' @param end An integer of length 1, specifying the number of samples at the
#'          end to ignore. The samples start:end inclusive will be used.
#' @param verbose ...
#' @param ... Further arguments (unused)
#' @return if nbin == 1:
#'     A matrix of class 'ep' with entry (i,j) containing the probability of
#'     an edge from node i --> j
#'   if nbin > 1:
#'     A list of class ep.list, containing matrices as described above for
#'     each of the nbin bins into which the parental.list was split
#' @S3method ep parental.list
#' @method ep parental.list
#' @seealso \code{\link{ep}}
ep.parental.list <- function(x, nbin = 1, start = 1, end = length(x),
                             verbose = F, ...){
  stopifnot("parental.list" %in% class(x),
            isTRUE(is.wholenumber(nbin)),
            validStartEnd(start, end, length(x)))
  # remove burn-in etc
  x <- x[seq(from = start, to = end)]
  lengthOfRun <- length(x)
  binSeq <- seq_len(nbin)
  numberOfNodes <- length(x[[1]])
  nodeSeq <- seq_len(numberOfNodes)
  sizeOfBins <- lengthOfRun/nbin
  if (!isTRUE(is.wholenumber(sizeOfBins))){
    stop("The number of bins (nbin) must divide into the number of samples")
  }
  # flatten samples early for efficiency
  # and remove samples for memory efficiency
  if (verbose){
    cat("Flattening samples\n")
  }
  #flatsamples <- vector("list", length = lengthOfRun * numberOfNodes);
  x <- unlist(x, recursive = F, use.names = F);
  #x <- NULL
  extractSamples <- function(bin){
    # By flattening the data and picking out every numberOfNodes th
    # element, we get the appropriate vector that shows the parents of
    # head.
    # create a subsetting vector:
    # We have data of the form
    #
    # list(
    #   list(2, integer(0), 1),
    #   list(integer(0), 1, 1),
    #   list(integer(0), 1, 1),
    #   list(integer(0), integer(0), 1),
    #   list(integer(0), 1, 1),
    #   list(integer(0), integer(0), 1)
    # )
    #
    # if nbin = 3
    # then for bin = 2, head = 1
    # we want
    #
    # from = 2 * 3 * (2 - 1) + 1 = 6 + 1  = 7
    # to = 2 * 3 * (2 - 1) + 1 + 2 * 3 - 1 = 6 + 1 + 6 - 1 = 12
    # which gives
    # 7, 8, 9, 10, 11, 12
    # as required
    tabulateNULL <- function(bin, ...){
      if (is.null(bin)){
        rep.int(0, numberOfNodes)
      }
      else {
        tabulate(bin, ...)
      }
    }
    out <- matrix(ncol = numberOfNodes, nrow = numberOfNodes)
    toTabulate <- vector("numeric", length = lengthOfRun * numberOfNodes)
    if (verbose){
      cat("Iterating over nodes\n")
    }
    for (head in nodeSeq){
      base <- sizeOfBins * numberOfNodes * (bin - 1) + head
      select <- seq.int(
        from = base,
        to = base + (sizeOfBins - 1) * numberOfNodes,
        by = numberOfNodes
      )
      if (verbose){
        cat("Unlisting data\n")
      }
      toTabulate <- unlist(x[select],
                           recursive = F,
                           use.names = F)
      # use tabulateNULL because if head never has any parents in this bin,
      # then flatten will contain a NULL and tabulate throws an error on NULL.
      # Place the output in every numberOfNodes th column
      if (verbose){
        cat("Tabulating\n")
      }
      out[, head] <- tabulateNULL(toTabulate, nbins = numberOfNodes)
    }
    epmx <- out/sizeOfBins
    class(epmx) <- c("ep", "matrix")
    epmx
  }
  if (verbose){
    cat("Extracting samples\n")
  }
  epl <- lapply(binSeq, extractSamples)
  if (nbin == 1){
    eparr <- epl[[1]]
    class(eparr) <- c("ep", "matrix")
    eparr
  }
  else {
    class(epl) <- "ep.list"
    epl
  }
}
#' Computes the edge probabilities implied by a table.
#'
#' Computes the edge probabilities implied by a table.
#'
#' @param x An object of class 'table'. The table should have names that
#'   can be converted into objects of class parental using
#'   \code{as.parental.character}. ie they should be serializations of
#'   parental objects.
#' @param verbose A logical indicating whether progress should be shown.
#' @param ... Further arguments passed to
#'   \code{\link{ep.parental.contingency}}
#' @return A matrix of class 'ep' with entry (i,j) containing the probability
#'   of an edge from node i --> j
#' @S3method ep table
#' @method ep table
#' @seealso \code{\link{ep}}
ep.table <- function(x, verbose = F, ...){
  stopifnot(class(x) == "table")
  tablenames <- names(x)
  if (verbose) cat("Converting names to parental\n")
  tablebn <- as.parental(tablenames)
  pc <- list(parental.list = tablebn,
             contingency   = as.numeric(x))
  class(pc) <- "parental.contingency"
  ep(pc, verbose = verbose, ...)
}
#' Posterior edge probabilities.
#'
#' Computes the edge probabilities from a \code{parental.contingency},
#' which is a list, whose first component is \code{parental.list}, and
#' whose second component is a contingency table of counts of the
#' corresponding element of the \code{parental.list}. The contingency
#' table must simply be a numeric vector. The first component must be named
#' \code{parental.list}, and the second \code{contingency}.
#'
#' @param x An object of class \code{parental.contingency}, of the form
#'   desribed in the Details section.
#' @param FUN A function to be applied to the samples before the edge
#'   probabilities are computed. The function should accept a vector
#'   of objects of class 'parental.list' and return the transformed
#'   parental objects as a single 'parental.list'. If the function
#'   does not return a 'parental.list', an error will be thrown. Can be
#'   supplied as a function, symbol or character string (see
#'   \code{\link[base]{match.fun}})
#' @param verbose A logical indicating whether progress should be shown.
#' @param ... Further arguments (unused)
#' @return A matrix of class 'ep' with entry (i,j) containing the
#'  probability of an edge from node \code{i} to \code{j}
#' @S3method ep parental.contingency
#' @method ep parental.contingency
#' @seealso \code{\link{ep}}
ep.parental.contingency <- function(x, FUN, verbose = F, ...){
  stopifnot(class(x) == "parental.contingency",
            "parental.list" %in% names(x),
            "contingency" %in% names(x),
            inherits(x$parental.list, "parental.list"),
            inherits(x$contingency, c("numeric", "integer")),
            inherits(verbose, "logical"))
  tablebn <- x$parental.list
  counts <- x$contingency
  tablebn <- unname(tablebn)
  counts <- unname(counts)
  # apply FUN if supplied
  if (!missing(FUN)){
    if (verbose) cat("Applying FUN to parental.list\n")
    FUN <- match.fun(FUN)
    tablebn <- FUN(tablebn)
    stopifnot(class(tablebn) == "parental.list")
  }
  numberOfNodes <- length(tablebn[[1]])
  nodeSeq <- seq_len(numberOfNodes)
  lengthOfRuns <- sum(counts)
  if (verbose) cat("Computing ep() from parental.contingency\n")
  res <- matrix(0, ncol = numberOfNodes, nrow = numberOfNodes)
  # loop over each cell of the edge probabilities matrix
  if (verbose){
    progress <- txtProgressBar(max = numberOfNodes^2, style = 3)
    setTxtProgressBar(progress, 0)
    prog <- 0
  }
  # t <- system.time({
  # for (i in seq_along(tablebn)){
  #   for (head in nodeSeq){
  #     tails <- tablebn[[i]][[head]]
  #     res[tails, head] <- res[tails, head] + counts[i]
  #   }
  #   # if (verbose){
  #   #   prog <<- prog + 1
  #   #   setTxtProgressBar(progress, prog)
  #   # }
  # }})
  #
  # browser()
  for (head in nodeSeq){
    thishead <- lapply(tablebn, "[[", head)
    lenhead <- sapply(thishead, length)
    counthead <- rep(counts, times = lenhead)
    thishead <- unlist(thishead)
    for (tail in nodeSeq){
      res[tail, head] <- sum(counthead[thishead == tail])
      if (verbose){
        prog <<- prog + 1
        setTxtProgressBar(progress, prog)
      }
    }
  }
  if (verbose){
    close(progress)
  }
  res <- res/lengthOfRuns
  class(res) <- c("ep", "matrix")
  res
}
#' Plot top graphs.
#'
#' Plot the the top() Bayesian Networks from a posterior distribution.
#' The top graphs are those with the highest score with respect to the
#' posterior distribution, which for converged MCMC will be most commonly
#' encountered graph.
#'
#' @param x An object of class "bnpostmcmc" or "bnpost"
#' @param top Optionally provide pre-computed top(x)
#' @param ... Further arguments, passed to \code{\link[parental]{grplot}}
#'
#' @return A plot of the top graph, with their marginal likelihoods (without
#'   priors)
#' @S3method plot bnpostmcmc
#' @method plot bnpostmcmc
#' @seealso \code{\link{levelplot.bnpostmcmc}},
#'   \code{\link{bnpostmcmc}}, \code{\link{bnpost}}
plot.bnpostmcmc <- plot.bnpost <- function(x, top = NULL, ...){
  if (is.null(top)){
    top <- top(x)
  }
  lsmd <- logScoreMultDir(top, data = x$data)
  lsmd <- round(lsmd, 2)
  names(top) <- paste(seq_along(top), ": ", as.character(lsmd), sep = "")
  grplot(top, ...)
}
#' Bayes Factors.
#'
#' method description
#'
#' @param bn1 A bn
#' @param bn2 ...
#' @param data ...
#' @param ... further arguments
#'
#' @S3method bf bnpostmcmc
#' @method bf bnpostmcmc
#' @seealso \code{\link{bf}}
bf.bnpostmcmc <- bf.bnpost <- function(bn1, bn2, data, ...){
  bnl <- bn.list(bn1, bn2)
  logScoreMultDir(bnl, data, ...)
}
#' Levelplot of BN Posterior from MCMC.
#'
#' method description
#'
#' @param x ...
#'
#' @S3method levelplot bnpostmcmc
#' @method levelplot bnpostmcmc
#' @seealso \code{\link{plot.bnpostmcmc}}
levelplot.bnpostmcmc <- levelplot.bnpost <- function(x){
  stopifnot(class(x) %in% c("bnpostmcmc", "bnpost"))
  levelplot(ep(x))
}
#' Internal function.
#'
#' method description
#'
#' @param ep ...
#' @seealso \code{\link{levelplot.ep}}
prepareLevelPlot <- function(ep){
  n <- dim(ep)[1]
  if (!is.null(colnames(ep))){
    nodeSeq <- colnames(ep)
  } else {
    nodeSeq <- seq_len(n)
  }
  data <- expand.grid(head = factor(nodeSeq, levels = rev(nodeSeq)),
                      tail = factor(nodeSeq, levels = nodeSeq))
  data$edgeprob <- as.vector(as.numeric(ep))
  data
}
#' Levelplot of posterior edge probabilities.
#'
#' Plot a \code{\link[lattice]{levelplot}} displaying the edge probabilities.
#'
#' @param ep An object of class \code{ep}. A matrix of edge probabilities.
#' @S3method levelplot ep
#' @method levelplot ep
#' @seealso \code{\link{levelplot.ep.list}}, \code{\link{dotplot.ep}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#'
#' myep <- ep(exact)
#' if (require(lattice)){
#'   levelplot(myep)
#' }
levelplot.ep <- function(ep){
  data <- prepareLevelPlot(ep)
  levelplot(
    edgeprob ~ tail * head,
    data,
    ylab = "Tail",
    xlab = "Head",
    main = "Edge probabilities",
    panel = function(x, y, z, subscripts, ...){
      panel.levelplot(x, y, z, subscripts, ...)
      ltext(x, y, signif(z[subscripts], 2), col = "black")
    }
  )
}
#' Levelplot of posterior edge probabilities.
#'
#' Plot a \code{\link[lattice]{levelplot}} displaying the edge probabilities.
#'
#' @param eplist An \code{ep.list} object. A list of edge probability
#'   matrices
#' @S3method levelplot ep.list
#' @method levelplot ep.list
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#' mcmc <- posterior(data = x, "mc3", nSamples = 500, nBurnin = 100)
#'
#' myep1 <- ep(exact)
#' myep2 <- ep(mcmc)
#' if (require(lattice)){
#'   levelplot(ep.list(Exact = myep1, MCMC = myep2))
#' }
levelplot.ep.list <- function(eplist){
  dfs <- lapply(eplist, prepareLevelPlot)
  data <- do.call("make.groups", dfs)
  levelplot(
    edgeprob ~ tail * head | which,
    data,
    ylab = "Tail",
    xlab = "Head",
    aspect = "iso",
    as.table = T,
    main = "Edge probabilities",
    panel = function(x, y, z, subscripts, ...){
      panel.levelplot(x, y, z, subscripts, ...)
      ltext(x[subscripts], y[subscripts],
            signif(z[subscripts], 2), col = "black")
    }
  )
}
#' Internal function.
#'
#' method description
#'
#' @param ep ...
shrinkep <- function(ep){
  offDiagonals <- upper.tri(ep) + lower.tri(ep) > 0
  # remove the diagonals
  ep <- ep[offDiagonals]
  # name the edge probabilities
  whichEdges <- which(offDiagonals, arr.ind = T)
  edgeNames <- apply(whichEdges, 1, paste, collapse = "->")
  names(ep) <- edgeNames
  ep
}
#' Dotplot of posterior edge probabilities.
#'
#' method description
#'
#' @param ep ...
#' @param head ...
#' @param ... Further arguments passed to method
#' @S3method dotplot ep
#' @method dotplot ep
#' @seealso \code{\link{levelplot.ep}}, \code{\link{dotplot.ep.list}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#'
#' myep <- ep(exact)
#' if (require(lattice)){
#'   dotplot(myep)
#' }
dotplot.ep <- function(ep, head = 30, ...){
  epdata <- data.frame(method = c(), ep = c(), name = c())
  epShrunk <- shrinkep(ep)
  epdata <- rbind(epdata, data.frame(
    ep = epShrunk,
    name = names(epShrunk)
  ))
  subsetTotals <- tapply(epdata[, c("ep")], epdata[, "name"], sum)
  subsetNames <- names(sort(subsetTotals, dec = T))
  # this was reorder(name) before R-check picked up on problem
  epdata$name <- with(epdata, reorder(epdata$name, ep))
  if (!is.null(head)){
    subsetNames <- subsetNames[seq_len(head)]
  }
  dotplot(
    name ~ ep,
    data = epdata,
    type = c("p", "g"),
    xlab = "Edge probability",
    auto.key = T,
    ylab = "Edges",
    subset = epdata$name %in% subsetNames,
    par.settings = simpleTheme(pch = 3:10),
    ...
  )
}
#' List of posterior edge probabilities.
#'
#' method description
#'
#' @param ... Further arguments passed to method
#' @export
#' @seealso \code{\link{ep}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#' mcmc <- posterior(data = x, "mc3", nSamples = 500, nBurnin = 100)
#'
#' myep1 <- ep(exact)
#' myep2 <- ep(mcmc)
#' ep.list(Exact = myep1, MCMC = myep2)
ep.list <- function(...){
  out <- list(...)
  class(out) <- "ep.list"
  out
}
#' Dotplot of list of posterior edge probabilities.
#'
#' method description
#'
#' @param eplist A *NAMED* list of edge probability matrices.
#' @param subsetBy ...
#' @param head ...
#' @param ... Further arguments passed to method
#' @S3method dotplot ep.list
#' @method dotplot ep.list
#' @seealso \code{\link{dotplot.ep}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#' mcmc <- posterior(data = x, "mc3", nSamples = 500, nBurnin = 100)
#'
#' myep1 <- ep(exact)
#' myep2 <- ep(mcmc)
#' if (require(lattice)){
#'   dotplot(ep.list(Exact = myep1, MCMC = myep2))
#' }
dotplot.ep.list <- function(eplist, subsetBy = "Exact", head = 30, ...){
  epTypes <- names(eplist)
  epdataList <- lapply(seq_along(eplist), function(whichEP){
    ep <- eplist[[whichEP]]
    nm <- epTypes[[whichEP]]
    epShrunk <- shrinkep(ep)
    data.frame(
      method = nm,
      ep = epShrunk,
      name = names(epShrunk)
    )
  })
  epdata <- do.call("rbind", epdataList)
  epdata[, "method"] <- as.factor(epdata[, "method"])
  if (!subsetBy %in% epTypes){
    subsetBy <- "MCMC"
  }
  epdatasubset <- subset(epdata, epdata$method %in% subsetBy)
  subsetTotals <- tapply(epdatasubset[, c("ep")], epdatasubset[, "name"], sum)
  subsetNames <- names(sort(subsetTotals, dec = T))
  epdata$name <- with(epdata, reorder(epdata$name, ep))
  if (!is.null(head)){
    subsetNames <- subsetNames[seq_len(head)]
  }
  dotplot(
    name ~ ep,
    groups       = epdata$method,
    data         = epdata,
    type         = c("p", "g"),
    xlab         = "Edge probability",
    auto.key     = T,
    ylab         = "Edges",
    subset       = epdata$name %in% subsetNames,
    par.settings = simpleTheme(pch = 3:10),
    ...
  )
}
#' Internal function.
#'
#' method description
#'
#' @param gplist An \code{gp.list} object
prepareGPPlot <- function(gplist){
  if (class(gplist) == "gp.list"){
    gpdataList <- lapply(names(gplist), function(nm){
      gp <- gplist[[nm]]
      data.frame(
        method = nm,
        gp = as.vector(gp),
        name = names(gp)
      )
    })
    gpdata <- do.call("rbind", gpdataList)
    gpdata[, "method"] <- as.factor(gpdata[, "method"])
    gpdata
  }
  else {
    # otherwise gplist is not a list
    data.frame(
        gp = as.vector(gplist),
        name = names(gplist)
    )
  }
}
#' Dotplot of posterior graph probabilities.
#'
#' method description
#'
#' @param gp ...
#' @param head ...
#' @param ... Further arguments passed to method
#' @S3method dotplot gp
#' @method dotplot gp
#' @seealso \code{\link{xyplot.gp}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#'
#' mygp <- gp(exact)
#' if (require(lattice)){
#'   dotplot(mygp)
#' }
dotplot.gp <- function(gp, head = 30, ...){
  gpdata <- prepareGPPlot(gp)
  subsetTotals <- tapply(gpdata[, c("gp")], gpdata[, "name"], sum)
  subsetNames <- names(sort(subsetTotals, dec = T))
  gpdata$name <- with(gpdata, reorder(gpdata$name, gp))
  if (!is.null(head)){
    subsetNames <- subsetNames[seq_len(head)]
  }
  dotplot(
    name ~ gp,
    data         = gpdata,
    type         = c("p", "g"),
    xlab         = "Graph probability",
    auto.key     = T,
    ylab         = "Graphs",
    subset       = gpdata$name %in% subsetNames,
    par.settings = simpleTheme(pch = 3:10),
    ...
  )
}
#' Scatterplot of posterior graph probabilities.
#'
#' method description
#'
#' @param gp ...
#' @param head ...
#' @param ... Further arguments passed to method
#' @S3method xyplot gp
#' @method xyplot gp
#' @seealso \code{\link{dotplot.gp}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#'
#' mygp <- gp(exact)
#' if (require(lattice)){
#'   xyplot(mygp)
#' }
xyplot.gp <- function(gp, head = 30, ...){
  gpdata <- prepareGPPlot(gp)
  # compute the probability of each graph
  subsetTotals <- tapply(gpdata[, c("gp")], gpdata[, "name"], sum)
  # sort the names by decreasing order of probability
  subsetNames <- names(sort(subsetTotals, dec = T))
  # sort the name factor by graph probability
  gpdata$name <- with(gpdata, reorder(name, gp))
  # reverse order of the levels
  gpdata$name <- factor(as.character(gpdata$name),
                        levels = rev(levels(gpdata$name)))
  if (!is.null(head)){
    subsetNames <- subsetNames[seq_len(head)]
  }
  xyplot(
    gp ~ name,
    data         = gpdata,
    type         = "h",
    xlab         = "Graphs",
    ylab         = "Graph probability",
    scales       = list(x = list(rot = 90)),
    subset       = gpdata$name %in% subsetNames,
    par.settings = simpleTheme(pch = 3:10),
    ...
  )
}
#' List of posterior graph probabilities.
#'
#' method description
#'
#' @param ... Further arguments passed to method
#' @export
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#' mcmc <- posterior(data = x, "mc3", nSamples = 500, nBurnin = 100)
#'
#' mygp1 <- gp(exact)
#' mygp2 <- gp(mcmc)
#' gp.list(Exact = mygp1, MCMC = mygp2)
gp.list <- function(...){
  out <- list(...)
  class(out) <- "gp.list"
  out
}
#' Dotplot of posterior graph probabilities.
#'
#' method description
#'
#' @param gplist ...
#' @param subsetBy ...
#' @param head ...
#' @param ... Further arguments passed to method
#' @S3method dotplot gp.list
#' @method dotplot gp.list
#' @seealso \code{\link{xyplot.gp.list}}, \code{\link{dotplot.gp}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#' mcmc <- posterior(data = x, "mc3", nSamples = 500, nBurnin = 100)
#'
#' mygp1 <- gp(exact)
#' mygp2 <- gp(mcmc)
#' if (require(lattice)){
#'   dotplot(gp.list(Exact = mygp1, MCMC = mygp2))
#' }
dotplot.gp.list <- function(gplist, subsetBy = "Exact", head = 30, ...){
  gpdata <- prepareGPPlot(gplist)
  if (!"Exact" %in% gpdata[, "method"]){
    subsetBy <- names(gplist)[1]
    warning(paste("Subsetting by", subsetBy))
  }
  gpdatasubset <- subset(gpdata, gpdata$method %in% subsetBy)
  subsetTotals <- tapply(gpdatasubset[, c("gp")], gpdatasubset[, "name"], sum)
  subsetNames <- names(sort(subsetTotals, dec = T))
  gpdata$name <- with(gpdata, reorder(name, gp))
  if (!is.null(head)){
    subsetNames <- subsetNames[seq_len(head)]
  }
  dotplot(
    name ~ gp,
    groups       = gpdata$method,
    data         = gpdata,
    type         = c("p", "g"),
    xlab         = "Graph probability",
    auto.key     = T,
    ylab         = "Graphs",
    subset       = gpdata$name %in% subsetNames,
    par.settings = simpleTheme(pch = 3:10),
    ...
  )
}
#' Scatterplot of posterior graph probablities.
#'
#' method description
#'
#' @param gplist ...
#' @param head ...
#' @param scales ...
#' @param highlight ...
#' @param ... Further arguments passed to method
#' @S3method xyplot gp.list
#' @method xyplot gp.list
#' @seealso \code{\link{dotplot.gp.list}}, \code{\link{xyplot.gp}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#' mcmc <- posterior(data = x, "mc3", nSamples = 500, nBurnin = 100)
#'
#' mygp1 <- gp(exact)
#' mygp2 <- gp(mcmc)
#' if (require(lattice)){
#'   xyplot(gp.list(Exact = mygp1, MCMC = mygp2))
#' }
xyplot.gp.list <- function(gplist, head = 30,
                           scales = list(x = list(rot = 90)),
                           highlight = NULL, ...){
  gpdata <- prepareGPPlot(gplist)
  if (!"Exact" %in% gpdata[, "method"]){
    subsetBy <- names(gplist)[1]
    warning(paste("Subsetting by", subsetBy))
  }
  # compute the probability of each graph
  subsetTotals <- tapply(gpdata[, c("gp")], gpdata[, "name"], sum)
  # sort the names by decreasing order of probability
  subsetNames <- names(sort(subsetTotals, dec = T))
  # sort the name factor by graph probability
  gpdata$name <- with(gpdata, reorder(name, gp))
  # reverse order of the levels
  gpdata$name <- factor(as.character(gpdata$name),
                        levels = rev(levels(gpdata$name)))
  if (!is.null(head)){
    subsetNames <- subsetNames[seq_len(head)]
  }
  numberOfTypes <- length(gplist)
  typesSeq <- seq_len(numberOfTypes)
  if (is.null(highlight)){
    gpdata$highlight <- factor(rep(T, nrow(gpdata)), levels = c(T, F))
  }
  else {
    gpdata$highlight <- factor(gpdata$name %in% highlight, levels = c(T, F))
  }
  typeSeq <- seq_len(numberOfTypes)
  mysuperpose <- trellis.par.get("superpose.symbol")
  mysuperpose$col <- rep(trellis.par.get("superpose.symbol")$col[typeSeq], 2)
  mysuperpose$pch <- rep(seq_len(numberOfTypes) + 2, 2)
  mysuperpose$alpha <- rep(c(1, 0.2), each = numberOfTypes)
  xyplot(
    gp ~ name,
    data               = gpdata,
    groups             = interaction(gpdata$method, highlight),
    type               = c("p", "h"),
    xlab               = "Graphs",
    ylab               = "Graph probability",
    scales             = scales,
    subset             = gpdata$name %in% subsetNames,
    key                = list(
                           text   = list(levels(gpdata$method)),
                           points = Rows(mysuperpose, typesSeq)
                         ),
    par.settings       = list(
      superpose.symbol = mysuperpose
    ),
    ...
  )
}
#' Summary of posterior graph probabilities.
#'
#' method description
#'
#' @param object ...
#' @param ... Further arguments passed to method
#' @S3method summary gp
#' @method summary gp
#' @seealso \code{\link{gp}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#'
#' mygp <- gp(exact)
#' summary(mygp)
summary.gp <- function(object, ...){
  data.frame(
    graph = as.character(as.parental(names(object)), pretty = T),
    gp = as.numeric(object)
  )
}
#' Threshold posterior edge probabilities.
#'
#' Return the parental given by thresholding a edge probability matrix
#' at a given level. The inequality is >=. Note this may well be cyclic.
#'
#' @param ep A matrix of class ep
#' @param threshold The value at which to threshold the matrix
#' @return An object of class parental
#' @export
#' @seealso \code{\link{ep}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1))
#' x2 <- factor(c(0, 1, 0, 1))
#' x <- data.frame(x1 = x1, x2 = x2)
#'
#' exact <- posterior(data = x, "exact")
#'
#' myep <- ep(exact)
#' parentalFromEPThreshold(myep, 0.2)
#' parentalFromEPThreshold(myep, 0.4)
parentalFromEPThreshold <- function(ep, threshold){
  stopifnot("ep"              %in% class(ep),
            class(threshold)  ==   "numeric",
            length(threshold) ==   1)
  n <- dim(ep)[1]
  # threshold the edge probability table at threshold
  edgelist <- which(ep >= threshold, arr.ind = T)
  # remove self-loops
  edgelist <- edgelist[edgelist[, 1] != edgelist[, 2], , drop = F]
  # convert the resulting edgelist to parental graph list thing
  as.parental(edgelist, type = "edgelist", n)
}
#' Convert 'parental list' to CPDAGs.
#'
#' A wrapper for \code{\link[parental]{as.cpdag}} that routes around the
#' issue that the parental.list is not of class \code{bn.list} etc.
#'
#' @param x A \code{parental.list}
#' @param verbose A logical. Should a progress bar be displayed?
#' @seealso \code{\link[parental]{as.cpdag}}, the function for which this
#'   function is a wrapper.
#' @return A \code{parental.list} of CPDAGs.
#' @export
parentalToCPDAG <- function(x, verbose = T){
  stopifnot(inherits(x, "parental.list"))
  # convert to bn, otherwise as.cpdag won't work.
  bnlist <- lapply(x, function(y){
    class(y) <- c("bn", "parental")
    y
  })
  class(bnlist) <- c("bn.list", "parental.list")
  if (verbose){
    progress <- txtProgressBar(max = length(bnlist), style = 3)
    setTxtProgressBar(progress, 0)
    prog <- 0
  }
  bnlistcp <- lapply(bnlist, function(y){
    if (verbose){
      prog <<- prog + 1
      setTxtProgressBar(progress, prog)
    }
    as.cpdag(y)
  })
  if (verbose){
    close(progress)
  }
  # convert back to parental.list for ep(x, method = "tabulate")
  class(bnlistcp) <- "parental.list"
  bnlistcp
}
#' Prepare data for plotting in a ROC plot.
#'
#' Converts MCMC samples, edge probability matrices etc to a data frame
#' ready for plotting as ROC curves.
#'
#' @param x The object to be prepared for an ROC plot
#' @param ... Further arguments passed to method
#' @seealso For actual graphs, see \code{\link{as.roc.parental}} and
#'   \code{\link{as.roc.parental.list}}. For edge probability matrices see
#'   \code{\link{as.roc.ep}} and \code{\link{as.roc.ep.list}}.
#'   For plotting \code{\link{rocplot}}. The methods defined are
#'   \code{\link{as.roc.parental}}, \code{\link{as.roc.parental.list}},
#'   \code{\link{as.roc.ep}}, \code{\link{as.roc.ep.list}}
#' @return A data frame, with columns \code{estimate}, \code{tp}, and
#'   \code{fp}. The first contains the supplied label; the latter two
#'   contain the number of true and false positives respectively.
#' @export
as.roc <- function(x, ...){
  UseMethod("as.roc")
}
#' Prepare a parental for a ROC plot.
#'
#' Compares two graphs, one of which is the true graph, and computes the
#' number of true and false positives.
#'
#' @param x A \code{parental} graph to compare to \code{true}
#' @param true A \code{bn}. The true graph.
#' @param label A label
#' @param ... Further arguments (unused)
#' @seealso For lists of graphs, see \code{\link{as.roc.parental.list}}. For
#'   edge probability matrices see \code{\link{as.roc.ep}} and
#'   \code{\link{as.roc.ep.list}}. \code{\link{as.roc.parental.list}}
#' @return A 1-row data frame, with columns \code{estimate}, \code{tp}, and
#'   \code{fp}. The first contains the supplied label; the latter two
#'   contain the number of true and false positives respectively
#' @S3method as.roc parental
#' @method as.roc parental
as.roc.parental <- function(x, true, label, ...){
  tp <- pintersect(x, true, count = T)
  fp <- psetdiff(x, true, count = T)
  totalPositives <- nEdges(true)
  totalPossibleEdges <- nNodes(true) * (nNodes(true) - 1)
  totalNegatives <- totalPossibleEdges - totalPositives
  data.frame(estimate = label,
             tp       = tp,
             fp       = fp,
             tpr      = tp/totalPositives,
             fpr      = fp/totalNegatives)
}
#' Prepare a parental list for a ROC plot.
#'
#' Compares a list of graphs \code{x} to a true graph \code{true}, and
#' returns the number of true and false positives.
#'
#' @param x The object to be prepared for an ROC plot
#' @param true A \code{bn}. The true graph.
#' @param labels A vector of labels
#' @param verbose A logical. Should progress be displayed?
#' @param ... Further arguments (unused)
#' @seealso For individual graphs, see \code{\link{as.roc.parental}}. For
#'   edge probability matrices see \code{\link{as.roc.ep}} and
#'   \code{\link{as.roc.ep.list}}. \code{\link{as.roc.parental}}
#' @return A data frame, with columns \code{estimate}, \code{tp}, and
#'   \code{fp}. The first contains the supplied label; the latter two
#'   contain the number of true and false positives respectively
#' @S3method as.roc parental.list
#' @method as.roc parental.list
as.roc.parental.list <- function(x, true, labels, verbose, ...){
  xSeq <- seq_along(x)
  out <- lapply(xSeq, function(i){
    as.roc(x[[i]], true, labels[[i]])
  })
  do.call("rbind", out)
}
#' Prepare an edge probability matrix for a ROC plot.
#'
#' Compares an edge probability matrices \code{x} to a true graph
#' \code{true}, and returns the number of true and false positives.
#'
#' @param x A matrix of class \code{ep} of edge probabilities
#' @param true A \code{bn}. The true graph.
#' @param thresholds A numeric vector of thresholds.
#' @param label A label
#' @param verbose A logical. Should progress should be displayed?
#' @param ... Further arguments (unused)
#' @seealso For lists of edge probability matrices, see
#'   \code{\link{as.roc.ep.list}}. For graphs, \code{\link{as.roc.parental}}
#'   and \code{\link{as.roc.parental.list}}. \code{\link{as.roc.ep.list}}
#' @return A data frame, with columns \code{estimate}, \code{tp}, and
#'   \code{fp}. The first contains the supplied label; the latter two
#'   contain the number of true and false positives respectively. Rows
#'   correspond to the supplied thresholds.
#' @S3method as.roc ep
#' @method as.roc ep
as.roc.ep <- function(x, true, thresholds, label, verbose, ...){
  rocdata <- data.frame(estimate = c(), tp = c(), fp = c())
  for (threshold in thresholds){
    xgraph <- parentalFromEPThreshold(x, threshold)
    newdata <- as.roc(xgraph, true, label = label)
    rocdata <- rbind(rocdata, newdata)
  }
  rocdata
}
#' Prepare an list of edge probability matrix for a ROC plot.
#'
#' Compares a list of edge probability matrices \code{x} to a true graph
#' \code{true}, and returns the number of true and false positives.
#'
#' @param x A list of matrices of class \code{ep} of edge probabilities
#' @param true A \code{bn}. The true graph.
#' @param thresholds A numeric vector of thresholds.
#' @param labels A vector of labels
#' @param verbose A logical. Should progress should be displayed?
#' @param ... Further arguments (unused)
#' @seealso For individual edge probability matrices see
#'   \code{\link{as.roc.ep}}. For graphs, \code{\link{as.roc.parental}} and
#'   \code{\link{as.roc.parental.list}}. \code{\link{as.roc.ep}}
#' @return A data frame, with columns \code{estimate}, \code{tp}, and
#'   \code{fp}. The first contains the supplied label; the latter two
#'   contain the number of true and false positives respectively. Rows
#'   correspond to the supplied thresholds, for each element of the supplied
#'   \code{ep.list}.
#' @S3method as.roc ep.list
#' @method as.roc ep.list
as.roc.ep.list <- function(x, true, thresholds, labels, verbose, ...){
  xSeq <- seq_along(x)
  out <- lapply(xSeq, function(i){
    as.roc(x[[i]], true, thresholds, labels[[i]])
  })
  do.call("rbind", out)
}
#' Plot an ROC curve.
#'
#' Print a ROC curve given a variety of estimation methods.
#'
#'
#' An alternative is given by:
#' Werhli et al. Comparative evaluation of reverse engineering gene
#' regulatory networks with relevance networks, graphical gaussian models
#' and bayesian networks. Bioinformatics (2006) vol. 22 (20) pp. 2523
#'
#' @param true An object of class bn giving the true graph. This is
#'   converted to a CPDAG before comparision.
#' @param maps An object of class \code{bn.list}. An estimate of the true
#'   graph, given by the MAP estimate given by the MCMC. This is internally
#'   converted to CPDAG.
#' @param bnpmls An object of class \code{bnpostmcmc.list}. An object
#'   encapsulating a number of MCMC runs.
#' @param eps An object of class \code{ep.list}.
#' @param thresholds An object of class numeric, giving the thresholds at
#'   which to plot the graphs given by MCMC and PC-MCMC estimates
#' @param use.cpdags A logical of length 1 indicating whether DAG should be
#'   converted to CPDAGs before computing the ROC curves.
#' @param verbose A logical indicating whether to show progress bars etc.
#' @param ... Further arguments
#' @return A lattice object, containing the ROC plot
#' @export
#' @seealso \code{\link{as.roc}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1, 1, 1))
#' x2 <- factor(c(0, 1, 0, 1, 0, 1))
#' x3 <- factor(c(0, 1, 0, 0, 0, 0))
#' x <- data.frame(x1 = x1, x2 = x2, x3 = x3)
#'
#' exact <- posterior(x, "exact")
#' eppost <- ep(exact)
#' mcmc <- posterior(x, "mc3", nSamples = 1000, nBurnin = 100)
#'
#' rocplot(true = bn(integer(0), c(1,3), integer(0)),
#'         eps = ep.list(eppost))
#' rocplot(true = bn(integer(0), integer(0), 2),
#'         eps = ep.list(eppost))
rocplot <- function(true,
                    maps,
                    bnpmls,
                    eps,
                    thresholds = seq(from = 0, to = 1, by = 0.01),
                    use.cpdags = F,
                    verbose    = F,
                    ...){
  haveMAPs <- !missing(maps)
  haveBNPMLs <- !missing(bnpmls)
  haveEPs <- !missing(eps)
  stopifnot("bn" %in% class(true),
            !haveMAPs   || inherits(maps, "bn.list"),
            !haveBNPMLs || inherits(bnpmls, "bnpostmcmc.list"),
            !haveEPs    || inherits(eps, "ep.list"),
            inherits(thresholds, c("numeric", "integer")),
            inherits(use.cpdags, "logical"),
            inherits(verbose, "logical"))
  if (haveMAPs){
    mapsNames <- names(maps)
    if (is.null(mapsNames)){
      mapsNames <- paste("MAP", seq_along(maps))
    }
  }
  if (haveBNPMLs){
    bnpmlsNames <- names(bnpmls)
    if (is.null(bnpmlsNames)){
      bnpmlsNames <- paste("BN Post", seq_along(bnpmls))
    }
  }
  if (haveEPs){
    epsNames <- names(eps)
    if (is.null(epsNames)){
      epsNames <- paste("EP", seq_along(eps))
    }
  }
  if (use.cpdags){
    if (verbose) cat("Converting true to cpdag\n")
    true <- as.cpdag(true)
    if (haveMAPs){
      if (verbose) cat("Converting MAPs to cpdag\n")
      maps <- lapply(maps, as.cpdag)
    }
    if (haveBNPMLs){
      if (verbose) cat("Converting BNPMLs to cpdag\n")
      bnpmls <- ep(x       = bnpmls,
                   method  = "tabulate",
                   FUN     = parentalToCPDAG,
                   verbose = T)
    }
  } else {
    if (haveBNPMLs){
      if (verbose) cat("Tabulating BNPMLs\n")
      bnpmls <- ep(x       = bnpmls,
                   method  = "tabulate",
                   verbose = T)
    }
  }
  rocdata <- data.frame(estimate = c(), tp = c(), fp = c())
  thresholds <- rev(thresholds)
  if (haveMAPs){
    if (verbose) cat("Computing ROC for Map\n")
    rocdataMap <- as.roc(x       = maps,
                         true    = true,
                         labels  = mapsNames,
                         verbose = verbose)
    rocdata <- rbind(rocdata, rocdataMap)
  }
  if (haveBNPMLs){
    if (verbose) cat("Computing ROC for BNPMLs\n")
    rocdataBNPMLs <- as.roc(x          = bnpmls,
                            true       = true,
                            thresholds = thresholds,
                            labels     = bnpmlsNames,
                            verbose    = verbose)
    rocdata <- rbind(rocdata, rocdataBNPMLs)
  }
  if (haveEPs){
    if (verbose) cat("Computing ROC for EPs\n")
    rocdataEP <- as.roc(x          = eps,
                        true       = true,
                        thresholds = thresholds,
                        labels     = epsNames,
                        verbose    = verbose)
    rocdata <- rbind(rocdata, rocdataEP)
  }
  rocdata[, "estimate"] <- factor(rocdata[, "estimate"])
  trimName <- function(x){
    lens <- sapply(x, nchar)
    substr(x, 1, lens - 2)
  }
  typeChar <- as.character(rocdata[, "estimate"])
  rocdata[, "type"] <- trimName(typeChar)
  ll <- c("Gibbs", "M-H", "REV", "Xie", "PC", "EP", "BN Post", "MAP")
  rocdata[, "type"] <- factor(rocdata[, "type"], levels = ll)
  rocdata[, "type"] <- factor(rocdata[, "type"])
  scalesAt <- c(0, seq_len(length(true) * (length(true) - 1)))
  xyplot(tpr ~ fpr,
         data         = rocdata,
         groups       = rocdata$type,
         xlab         = "False positives",
         ylab         = "True positives",
         type         = c("s", "s", "s", "p", "p"),
         scales       = list(at = scalesAt),
         auto.key     = T,
         par.settings = simpleTheme(pch = 3:10),
         panel = panel.superpose,
         panel.groups = function(x, y, ...){
           panel.xyplot(x, y, groups = rocdata$estimate, ...)
         },
         ...)
}
#' Compute the area under an ROC curve.
#'
#' Generic function for computing the area under an ROC curve.
#'
#' @param x An appropriate object.
#' @param ... Further arguments passed to method.
#' @seealso \code{\link{auroc.ep}}, \code{\link{auroc.ep.list}}
#' @return The area under the ROC curve(s).
#' @export
auroc <- function(x, ...){
  UseMethod("auroc")
}
#' Compute the area under an ROC curve.
#'
#' Compute the area under an ROC curve for an edge probability matrix.
#'
#' @param x A matrix of class \code{ep} of edge probabilities
#' @param true A \code{bn}. The true graph.
#' @param thresholds A numeric vector of thresholds.
#' @param label A label
#' @param verbose A logical. Should progress should be displayed?
#' @param ... Further arguments (unused)
#' @seealso \code{\link{auroc}}, \code{\link{auroc.ep.list}}
#' @return The area under the ROC curve.
#' @S3method auroc ep
#' @method auroc ep
auroc.ep <- function(x, true, thresholds, label, verbose, ...){
  rocdata <- data.frame(estimate = c(), tp = c(), fp = c())
  for (threshold in thresholds){
    xgraph <- parentalFromEPThreshold(x, threshold)
    newdata <- as.roc(xgraph, true, label = label)
    rocdata <- rbind(rocdata, newdata)
  }
  # compute area under curve
  auc <- function(x, y){
    sum(diff(x) * y[-length(y)])
  }
  auc(rev(rocdata[, "fpr"]), rev(rocdata[, "tpr"]))
}
#' Compute the area under an ROC curve.
#'
#' Compute the area under an ROC curve for a list of edge probability
#' matrices.
#'
#' @param x A list of matrices of class \code{ep} of edge probabilities
#' @param true A \code{bn}. The true graph.
#' @param thresholds A numeric vector of thresholds.
#' @param labels A vector of labels
#' @param verbose A logical. Should progress should be displayed?
#' @param ... Further arguments (unused)
#' @seealso For individual edge probability matrices see
#'   \code{\link{auroc.ep}}.
#' @return A vector of areas under ROC.
#' @S3method auroc ep.list
#' @method auroc ep.list
auroc.ep.list <- function(x, true, thresholds, labels, verbose, ...){
  xSeq <- seq_along(x)
  sapply(xSeq, function(i){
    auroc(x[[i]], true, thresholds, labels[[i]])
  })
}
#' Cumulative total variation distance.
#'
#' method description
#'
#' @param exactgp ...
#' @param bnpostmcmclist ...
#' @param start ...
#' @param end ...
#' @param nbin ...
#' @param ... Further arguments passed to method
#' @export
#' @seealso \code{\link{xyplot.cumtvd}}, \code{\link{cumtvd.list}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1, 1, 1))
#' x2 <- factor(c(0, 1, 0, 1, 0, 1))
#' x3 <- factor(c(0, 1, 0, 0, 0, 0))
#' x <- data.frame(x1 = x1, x2 = x2, x3 = x3)
#'
#' exact <- posterior(x, "exact")
#' exactgp <- gp(exact)
#' mcmc1 <- posterior(x, "mc3", nSamples = 1000, nBurnin = 100)
#' mcmc2 <- posterior(x, "mc3", nSamples = 1000, nBurnin = 100)
#'
#' tvd <- cumtvd(exactgp = exactgp,
#'               bnpostmcmclist = bnpostmcmc.list(mcmc1, mcmc2))
cumtvd <- function(exactgp, bnpostmcmclist, start = 1, end,
                   nbin = (end - start + 1)/100){
  stopifnot(class(bnpostmcmclist) == "bnpostmcmc.list")
  if (missing(end)){
    end <- length(bnpostmcmclist[[1]]$samples)
    if (missing(nbin)){
      nbin <- (end - start + 1)/100
    }
  }
  numberOfNodes <- length(bnpostmcmclist[[1]]$samples[[1]])
  numberOfRuns <- length(bnpostmcmclist)
  # heuristic for whether to use pretty printing
  pretty <- seemsPretty(names(exactgp[1]))
  # get the graph probs
  # for each graph
  # for each MCMC sample
  # for each bin
  gpl <- gp(
    bnpostmcmclist,
    start = start,
    end = end,
    nbin = nbin,
    levels = names(exactgp),
    pretty = pretty
  )
  # want to convert the gp list to a matrix
  # with individual graphs in the columns
  # with the graphs ordered according to as.table in xyplot
  convertToMatrix <- function(gp){
    matrix(unlist(gp), ncol = numberOfGraphsDiscovered, byrow = T)
  }
  numberOfGraphsDiscovered <- length(gpl[[1]][[1]])
  gpmxl <- lapply(gpl, convertToMatrix)
  # convert to cumulative probabilities
  convertToCumulativeProbabilities <- function(gpmx, nbin){
    1/(seq_len(nbin)) * apply(gpmx, 2, cumsum)
  }
  cumgpmxl <- lapply(gpmxl, convertToCumulativeProbabilities, nbin)
  # convert the exact gp to a dataframe
  # with graphs across cols
  # and with the nbin number of rows
  exactgpmx <- matrix(exactgp, ncol = length(exactgp), nrow = nbin, byrow = T)
  # compute the tvd
  # for each MCMC sample
  # for each each bin
  cumtvds <- lapply(cumgpmxl, function(cumgpmx){
    rowSums(abs(cumgpmx - exactgpmx))
  })
  # do some nice naming
  names(cumtvds) <- paste("Run", seq_len(numberOfRuns))
  # return list of probs
  out <- list(
    cumtvds = cumtvds,
    lengthOfRuns = end - (start - 1),
    nbin = nbin,
    numberOfNodes = numberOfNodes,
    numberOfRuns = numberOfRuns
  )
  class(out) <- "cumtvd"
  out
}
#' Plot cumulative total variation distance.
#'
#' method description
#'
#' @param cumtvd ...
#' @S3method xyplot cumtvd
#' @method xyplot cumtvd
#' @seealso \code{\link{cumtvd}}
#' @examples
#' x1 <- factor(c(1, 1, 0, 1, 1, 1))
#' x2 <- factor(c(0, 1, 0, 1, 0, 1))
#' x3 <- factor(c(0, 1, 0, 0, 0, 0))
#' x <- data.frame(x1 = x1, x2 = x2, x3 = x3)
#'
#' exact <- posterior(x, "exact")
#' exactgp <- gp(exact)
#' mcmc1 <- posterior(x, "mc3", nSamples = 1000, nBurnin = 100)
#' mcmc2 <- posterior(x, "mc3", nSamples = 1000, nBurnin = 100)
#'
#' tvd <- cumtvd(exactgp = exactgp,
#'               bnpostmcmclist = bnpostmcmc.list(mcmc1, mcmc2))
#' if (require(lattice)){
#'   xyplot(tvd)
#' }
xyplot.cumtvd <- function(cumtvd){
  # stack the cumtvd to a dataframe
  # with a column 'ind'
  # denoting which MCMC sample it orginated from
  data <- stack(cumtvd$cumtvd)
  # index along the bins
  data[[".index"]] <- seq_len(cumtvd$nbin)
  xyplot(
    values ~ .index,
    data,
    panel = panel.superpose,
    groups = data$ind,
    type = "l",
    main = paste(
      "Total variation distance through time, with",
      cumtvd$lengthOfRuns,
      "samples in",
      cumtvd$numberOfRuns,
      "runs"
    ),
    auto.key = T,
    ylab = "Total variation distance",
    xlab = "Samples",
    scales = list(x = list(draw = F))
  )
}
#' List of cumulative total variation distance.
#'
#' method description
#'
#' @param ... Further arguments passed to method
#' @export
#' @seealso \code{\link{cumtvd}}
cumtvd.list <- function(...){
  cumtvdlist <- list(...)
  class(cumtvdlist) <- c("cumtvd.list")
  cumtvdlist
}
#' Plot of cumulative total variation distance.
#'
#' method description
#'
#' @param cumtvdlist ...
#' @S3method xyplot cumtvd.list
#' @method xyplot cumtvd.list
#' @seealso \code{\link{xyplot.cumtvd.list}}
xyplot.cumtvd.list <- function(cumtvdlist){
  cumtvdl <- lapply(cumtvdlist, function(cumtvd){
    # stack the cumtvd to a dataframe
    # with a column 'ind'
    # denoting which MCMC sample it orginated from
    stack(cumtvd$cumtvd)
  })
  names(cumtvdl) <- names(cumtvdlist)
  data <- do.call("make.groups", cumtvdl)
  # index along the bins
  data[[".index"]] <- seq_len(cumtvdlist[[1]]$nbin)
  xyplot(
    values ~ .index | which,
    data,
    groups = data$ind,
    type = "l",
    main = paste(
      "Total variation distance through time, with (for type 1)",
      cumtvdlist[[1]]$lengthOfRuns,
      "samples in",
      cumtvdlist[[1]]$numberOfRuns,
      "runs"
    ),
    auto.key = T,
    ylab = "Total variation distance",
    xlab = "Samples",
    scales = list(x = list(draw = F))
  )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.