#' Bayesian network structure learning
#'
#' Wrapper function for executing general structure learning algorithms, much like \code{\link[bnlearn]{rsmax2}} from the \code{\link[bnlearn]{bnlearn}} package, with the addition of the partitioned PC (pPC) algorithm, the \eqn{p}-value adjacency thresholding (PATH) algorithm, and the hybrid greedy initialization (HGI) algorithm.
#'
#' @param x a data frame containing the variables in the model. Currently, the implementations of pPC, PATH, and HGI only support discrete data.
#' @param restrict an argument as in \code{\link[bnlearn]{rsmax2}}, with the following additional options.
#' \itemize{
#' \item \code{restrict = "ppc"} for the pPC algorithm (\code{\link{ppc}}).
#' \item \code{restrict = "true"} with \code{true_bn} supplied to perfectly restrict to the true skeleton.
#' \item \code{restrict = "cig"} with \code{true_bn} supplied to perfectly restrict to the conditional independence graph (CIG).
#' \item \code{restrict = ""} for no restriction method (i.e., for score-based methods).
#' }
#' @param maximize an argument as in \code{\link[bnlearn]{rsmax2}}, with the additional option of \code{maximize = ""} for no maximization method (i.e., for constraint-based methods).
#' @param restrict.args an argument as in \code{\link[bnlearn]{rsmax2}}, with the addition of the following arguments only applicable to \code{restrict = "ppc"} (\code{\link{ppc}}).
#' \itemize{
#' \item \code{max_groups = 20}: a numeric value indicating maximum number of groups to partition into. The PC(-stable) algorithm may be recovered with \code{max_groups = 1}, allowing for the \code{sort_pval} argument.
#' \item \code{sort_pval = TRUE}: a logical value indicating whether or not to sort the order of the consideration of conditioning sets by the current p-values.
#' \item \code{max_wthn_sx}: a numeric value indicating the maximum size of considered conditioning sets when estimating edges within clusters.
#' \item \code{max_btwn_sx}: a numeric value indicating the maximum size of considered conditioning sets when estimating edges between clusters.
#' \item \code{max_btwn_nbr}: a numeric value indicating the maximum neighborhood size when estimating edges between clusters.
#' \item \code{maxp}: a numeric value indicating the maximum number of parents for a node, as in \code{\link[bnlearn]{hc}} and \code{\link[bnlearn]{tabu}}. Useful when \code{hgi = TRUE}. Should be less than or equal to \code{maximize.args$maxp}.
#' }
#' Additionally, \code{\link{ppc}} only uses \code{test = "mi"} for the clustering step, though subsequent stages of the algorithm can employ different tests.
#' @param maximize.args an argument as in \code{\link[bnlearn]{rsmax2}}, with the \code{maxp} argument likewise used constraint-based edge orientation, if applicable. Additionally, the \code{score} argument determines the score used in PATH (\code{score = "pred-loglik"} not supported), and HGI exclusively uses \code{score = "bic"}.
#' @param undirected a logical value indicating, for constraint-based algorithms, whether the skeleton should be output without learning edge orientations. Not applicable for hybrid methods, score-based methods, or when \code{path} or \code{hgi} are activated.
#' @param path a numeric value indicating the number of solution(s) to be generated by the PATH algorithm. By default, PATH remains deactivated with \code{path = 1}. Only applicable for \code{restrict = "ppc"}.
#' @param min_alpha a numeric value between \code{0} and \code{restrict.args$alpha} that indicates the minimum threshold value for the PATH algorithm.
#' @param hgi a logical value activating the HGI algorithm for greedy edge orientation. Not applicable if \code{restrict = ""}.
#' @param true_bn a \code{bn} object with the true underlying structure of \code{x} to evaluate d-separation tests instead of conditional independence tests. Only applicable for \code{restrict = "ppc"} or \code{restrict = "true"}.
#' @param whitelist,blacklist,debug arguments as in \code{\link[bnlearn]{rsmax2}}.
#' @details None.
#' @return A Bayesian network as an object of class \code{bn}.
#' @author Jireh Huang (\email{jirehhuang@@ucla.edu})
#' @references \href{https://doi.org/10.1007/s10994-022-06145-4}{Huang, J., & Zhou, Q. (2022). Partitioned hybrid learning of Bayesian network structures. \emph{Machine Learning}. https://doi.org/10.1007/s10994-022-06145-4}
#' @seealso \code{\link{ppc}}, \code{\link{phgs}}
#' @examples
#' ## Read Bayesian network object
#' true_bn <- bnrepository("child")
#'
#' ## Generate data and relevel for simplicity
#' set.seed(1)
#' x <- bnlearn::rbn(true_bn, n = 1e4)
#' x <- as.data.frame(sapply(x, function(x) as.factor(as.integer(x) - 1L)),
#' stringsAsFactors = TRUE)
#'
#' ## pPC with PATH
#' bn1 <- bnsl(x = x, restrict = "ppc", maximize = "",
#' restrict.args = list(alpha = 1e-3, max.sx = 3, sort_pval = TRUE),
#' maximize.args = list(maxp = 8), path = 10, min_alpha = 1e-5,
#' hgi = FALSE, debug = TRUE)
#'
#' ## pHGS (pPC with PATH, HGI, and greedy search)
#' bn2 <- bnsl(x = x, restrict = "ppc", maximize = "tabu",
#' restrict.args = list(alpha = 1e-3, max.sx = 3, sort_pval = TRUE),
#' maximize.args = list(maxp = 8, tabu = 10, max.tabu = 10),
#' path = 10, min_alpha = 1e-5, hgi = TRUE, debug = TRUE)
#'
#' ## MMHC with HGI
#' bn3 <- bnsl(x = x, restrict = "mmpc", maximize = "hc",
#' restrict.args = list(alpha = 1e-3, max.sx = 3, sort_pval = TRUE),
#' maximize.args = list(maxp = 8), hgi = TRUE, debug = TRUE)
#' @export
bnsl <- function(x, restrict = "ppc", maximize = "tabu",
restrict.args = list(), maximize.args = list(),
undirected = FALSE, path = 1, min_alpha = 1e-5,
hgi = FALSE, true_bn = NULL, whitelist = NULL,
blacklist = NULL, debug = FALSE)
{
times <- tests <-
c(restrict = 0, orient = 0, maximize = 0)
nodes <- names(x)
if (!(restrict <- ifelse(is.null(restrict), "", restrict)) %in% c("ppc", "true",
"cig", "")){
bnlearn:::check.learning.algorithm(restrict, class = c("constraint"))
}
if (!(maximize <- ifelse(is.null(maximize), "", maximize)) %in% c(""
)){
bnlearn:::check.learning.algorithm(maximize, class = "score")
}
bnlearn:::check.logical(hgi)
debug_cli(!is.numeric(path) || (path %% 1 != 0) || is.infinite(path),
cli::cli_abort, "path must be a positive numeric integer")
if (restrict == "" && maximize == "")
debug_cli(TRUE, cli::cli_abort,
"no method given for both restrict and maximize")
else if ((restrict == "mmpc") && (maximize == "hc"))
method <- "mmhc"
else if ((restrict == "hpc") && (maximize == "hc"))
method <- "h2pc"
else
method <- sprintf("%s%s", restrict, maximize)
method <- sprintf("%s%s", method, ifelse(hgi, "-hgi", ""))
method <- ifelse(method %in% c("ppchc-hgi",
"ppctabu-hgi"), "phgs", method)
algo <- gsub("-.*", "", method)
if (restrict == ""){
if (path > 1){
debug_cli(TRUE, cli::cli_alert_warning,
"deactivating path because no restrict method supplied")
path <- 1
}
if (hgi){
debug_cli(TRUE, cli::cli_alert_warning,
"deactivating hgi because no restrict method supplied")
hgi <- FALSE
}
}
else if (maximize == ""){
if (restrict %in% c("ppc", "true", "cig")){
algo <- "pc.stable"
}
}
else if (!method %in% bnlearn:::hybrid.algorithms){
algo <- "rsmax2"
}
if (path > 1 && (restrict != "ppc" ||
!is.null(true_bn))){
debug_cli(TRUE, cli::cli_alert_warning,
c("path can only be used for finite-sample executions of ppc; path deactivated"))
path <- 1
}
min_alpha <- bnlearn:::check.alpha(min_alpha)
bnlearn:::check.logical(undirected)
undirected <- (undirected || restrict == "") &&
!(path > 1) && (!hgi) # not undirected if path or hgi
if (!is.null(true_bn)){
if (any(class(true_bn) == "bn.fit")){
arcs <- bnlearn::arcs(true_bn)
true_bn <- bnlearn::empty.graph(nodes = bnlearn::nodes(true_bn))
bnlearn::arcs(true_bn) <- arcs
}
bnlearn:::check.bn(true_bn)
}
debug_cli(debug, cli::cli_alert_info,
c("structure learning with restrict = {restrict}, ",
"path = {path}, hgi = {hgi}, maximize = {maximize}"))
rst <- NULL
if (restrict %in% c(bnlearn:::constraint.based.algorithms, "ppc", "true", "cig")) {
start_time <- Sys.time()
critical.arguments <- c("x", "method", "whitelist",
"blacklist", "true_bn", "debug")
named.arguments <- names(formals(skeleton))
named.arguments <- setdiff(named.arguments, critical.arguments)
other.arguments <- setdiff(names(restrict.args), named.arguments)
bnlearn:::check.unused.args(other.arguments, character(0))
restrict.args[critical.arguments] <- list(x, method = restrict, whitelist = whitelist,
blacklist = blacklist, true_bn = true_bn, debug = debug)
rst <- do.call("skeleton", restrict.args)
end_time <- Sys.time()
times[1] <- as.numeric(end_time - start_time, unit = "secs")
tests[1] <- sum(attr(rst, "learning")$ntests)
}
if (undirected){
## no path or hgi
debug_cli(all(c(restrict, maximize) == ""), cli::cli_abort,
"valid constraint-based algorithm must be supplied if undirected graph desired")
arcs <- bnlearn:::nbr2arcs(rst)
learning <- c(attr(rst, "learning"),
list(algo = method, undirected = undirected,
illegal = bnlearn:::check.arcs.against.assumptions(NULL, x,
attr(rst, "learning")$test)))
res <- list(learning = learning,
nodes = bnlearn:::cache.structure(names(x), arcs = arcs), arcs = arcs)
}
else if (restrict != ""){ ## TODO: check if necessary
start_time <- Sys.time()
## orient, with or without path and with or without hgi
restrict.args[c("whitelist", "blacklist")] <- attr(rst, "learning")[c("whitelist",
"blacklist")]
args <- c(restrict.args,
list(local.structure = rst, score = maximize.args$score,
path = path, min_alpha = min_alpha, hgi = hgi,
complete = attr(rst, "learning")$complete))
if (is.null(args$maxp))
args$maxp <- maximize.args$maxp
else if (!is.null(maximize.args$maxp))
args$maxp <- min(args$maxp, maximize.args$maxp)
res <- do.call("orient", args[names(args) %in% names(formals(orient))])
# res$learning["blacklist"] = list(blacklist) # TODO: check if necessary
end_time <- Sys.time()
times[2] <- as.numeric(end_time - start_time, unit = "secs")
tests[2] <- res$learning$tests
}
if (maximize == ""){
res <- structure(res, class = "bn")
}
else{
start_time <- Sys.time()
if (restrict != ""){
## restricted greedy search
start <- bnlearn::empty.graph(nodes = nodes)
bnlearn::arcs(start) <- res$arcs
bnlearn::arcs(start) <- bnlearn::directed.arcs(start)
constraints <- bnlearn:::arcs.to.be.added(amat = res$arcs, nodes = nodes,
whitelist = res$learning$blacklist)
}
else{
constraints <- start <- NULL
}
critical.arguments <- c("x", "start", "heuristic",
"whitelist", "blacklist", "debug")
named.arguments <- names(formals(bnlearn:::greedy.search))
named.arguments <- setdiff(named.arguments, critical.arguments)
bnlearn:::check.unused.args(intersect(critical.arguments, names(maximize.args)),
character(0))
if (any(bnlearn:::which.undirected(whitelist, nodes = nodes)))
whitelist <- bnlearn:::cpdag.arc.extension(whitelist, nodes = nodes)
maximize.args[critical.arguments] <- list(x, start = start, heuristic = maximize,
whitelist = whitelist, blacklist = constraints,
debug = debug >= 3)
rst <- res[c("learning", "nodes", "arcs")]
res <- do.call(bnlearn:::greedy.search, maximize.args)
## rst args
res$learning$args[c("alpha", "min_alpha",
"best_alpha")] <- rst$learning$args[c("alpha", "min_alpha",
"best_alpha")]
## rst
res$learning[c("rstest", "max.sx", "group", "path",
"hgi", "whitelist", "blacklist", "arcs1")] <-
rst$learning[c("test", "max.sx", "group", "path",
"hgi", "whitelist", "blacklist", "arcs1")]
end_time <- Sys.time()
times[3] <- as.numeric(end_time - start_time, unit = "secs")
tests[3] <- res$learning$ntests -
(hgi * length(nodes)) # reference scores already computed in hgi
debug_cli(debug, cli::cli_alert_success,
c("completed greedy search with {maximize} ",
"in {prettyunits::pretty_sec(times[3])} with {tests[3]} calls"))
}
## other
res$learning$maxscore <- res$learning$test
res$learning[c("ntests", "algo", "method",
"undirected", "restrict", "maximize")] <- list(sum(tests), algo, method,
undirected, restrict, maximize)
res$learning[c("times", "tests")] <- list(times, tests)
debug_cli(debug, cli::cli_alert_success,
c("completed structure learning ",
"in {prettyunits::pretty_sec(sum(times))} with {sum(tests)} calls"))
return(invisible(res))
}
#' The partitioned PC (pPC) algorithm
#'
#' Function for executing the partitioned PC (pPC) algorithm, including the option to use the \eqn{p}-value adjacency thresholding (PATH) algorithm.
#'
#' @param x,undirected,alpha,max.sx,maxp,path,min_alpha,hgi,max_wthn_sx,max_btwn_sx,max_btwn_nbr,sort_pval,max_groups,true_bn,cluster,whitelist,blacklist,debug see \code{\link{bnsl}} for a description of arguments.
#' @return A Bayesian network as an object of class \code{bn}.
#' @author Jireh Huang (\email{jirehhuang@@ucla.edu})
#' #' @references \href{https://doi.org/10.1007/s10994-022-06145-4}{Huang, J., & Zhou, Q. (2022). Partitioned hybrid learning of Bayesian network structures. \emph{Machine Learning}. https://doi.org/10.1007/s10994-022-06145-4}
#' @seealso \code{\link{bnsl}}, \code{\link{phgs}}
#' @examples
#' ## Read Bayesian network object
#' true_bn <- bnrepository("child")
#'
#' ## Generate data and relevel for simplicity
#' set.seed(1)
#' x <- bnlearn::rbn(true_bn, n = 1e4)
#' x <- as.data.frame(sapply(x, function(x) as.factor(as.integer(x) - 1L)),
#' stringsAsFactors = TRUE)
#'
#' ## pPC with PATH
#' bn1 <- ppc(x = x, alpha = 1e-2, max.sx = 3, maxp = 8, path = 10,
#' min_alpha = 1e-5, sort_pval = TRUE, max_groups = 20,
#' debug = TRUE)
#'
#' ## PC(-stable) with PATH
#' bn2 <- ppc(x = x, alpha = 1e-2, max.sx = 3, maxp = 8, path = 10,
#' min_alpha = 1e-5, sort_pval = TRUE, max_groups = 1,
#' debug = TRUE)
#'
#' ## pPC with PATH and HGI (no greedy search)
#' bn3 <- ppc(x = x, alpha = 1e-2, max.sx = 3, maxp = 8, path = 10,
#' min_alpha = 1e-5, hgi = TRUE, sort_pval = TRUE,
#' max_groups = 20, debug = TRUE)
#' @export
ppc <- function(x, undirected = FALSE, alpha = NULL, max.sx = NULL,
maxp = NULL, path = 1, min_alpha = 1e-5, hgi = FALSE,
max_wthn_sx = max.sx, max_btwn_sx = max.sx, max_btwn_nbr = ncol(x)-2,
sort_pval = TRUE, max_groups = 20, true_bn = NULL,
cluster = NULL, whitelist = NULL, blacklist = NULL, debug = FALSE){
restrict.args <- list(cluster = cluster, test = "mi", alpha = alpha,
max.sx = max.sx, max_wthn_sx = max_wthn_sx,
max_btwn_sx = max_btwn_sx, max_btwn_nbr = max_btwn_nbr,
sort_pval = sort_pval, max_groups = max_groups)
maximize.args <- list(maxp = maxp)
bnsl(x = x, restrict = "ppc", maximize = "",
restrict.args = restrict.args, maximize.args = maximize.args,
undirected = undirected, path = path, min_alpha = min_alpha,
hgi = hgi, true_bn = true_bn, whitelist = whitelist,
blacklist = blacklist, debug = debug)
}
#' The partitioned hybrid greedy search (pHGS) algorithm
#'
#' Function for executing the partitioned hybrid greedy search (pHGS) algorithm, a combination of the partitioned PC (pPC) algorithm, the \eqn{p}-value adjacency thresholding (PATH) algorithm, and the hybrid greedy initialization (HGI) algorithm.
#'
#' @param x,maximize,maximize.args,alpha,max.sx,maxp,path,min_alpha,max_wthn_sx,max_btwn_sx,max_btwn_nbr,sort_pval,max_groups,true_bn,cluster,whitelist,blacklist,debug see \code{\link{bnsl}} for a description of arguments.
#' @details See \code{\link{bnsl}}.
#' @return A Bayesian network as an object of class \code{bn}.
#' @author Jireh Huang (\email{jirehhuang@@ucla.edu})
#' #' @references \href{https://doi.org/10.1007/s10994-022-06145-4}{Huang, J., & Zhou, Q. (2022). Partitioned hybrid learning of Bayesian network structures. \emph{Machine Learning}. https://doi.org/10.1007/s10994-022-06145-4}
#' @seealso \code{\link{bnsl}}, \code{\link{ppc}}
#' @examples
#' ## Read Bayesian network object
#' true_bn <- bnrepository("child")
#'
#' ## Generate data and relevel for simplicity
#' set.seed(1)
#' x <- bnlearn::rbn(true_bn, n = 1e4)
#' x <- as.data.frame(sapply(x, function(x) as.factor(as.integer(x) - 1L)),
#' stringsAsFactors = TRUE)
#'
#' ## pHGS (pPC with PATH and HGI) with hill-climbing
#' bn1 <- phgs(x = x, maximize = "hc",
#' maximize.args = list(maxp = 8, restart = 10, perturb = 10),
#' alpha = 1e-2, max.sx = 3, maxp = 8, path = 10,
#' min_alpha = 1e-5, sort_pval = TRUE, max_groups = 20,
#' debug = TRUE)
#'
#' ## pHGS (pPC with PATH and HGI) with tabu search
#' bn2 <- phgs(x = x, maximize = "tabu",
#' maximize.args = list(maxp = 8, tabu = 10, max.tabu = 10),
#' alpha = 1e-2, max.sx = 3, maxp = 8, path = 10,
#' min_alpha = 1e-5, sort_pval = TRUE, max_groups = 20,
#' debug = TRUE)
#' @export
phgs <- function(x, maximize = "tabu", maximize.args = NULL, alpha = NULL,
max.sx = NULL, maxp = NULL, path = 1, min_alpha = 1e-5,
max_wthn_sx = max.sx, max_btwn_sx = max.sx, max_btwn_nbr = ncol(x)-2,
sort_pval = TRUE, max_groups = 20, true_bn = NULL,
cluster = NULL, whitelist = NULL, blacklist = NULL, debug = FALSE){
restrict.args <- list(cluster = cluster, test = "mi", alpha = alpha,
max.sx = max.sx, max_wthn_sx = max_wthn_sx,
max_btwn_sx = max_btwn_sx, max_btwn_nbr = max_btwn_nbr,
sort_pval = sort_pval, max_groups = max_groups)
if (!is.null(maxp) && is.null(maximize.args$maxp))
maximize.args$maxp <- maxp
bnsl(x = x, restrict = "ppc", maximize = maximize,
restrict.args = restrict.args, maximize.args = maximize.args,
undirected = FALSE, path = path, min_alpha = min_alpha,
hgi = TRUE, true_bn = true_bn, whitelist = whitelist,
blacklist = blacklist, debug = debug)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.