Nothing
### ----------------------------------------------------------------------------
### This file is part of boolean3
###
### Copyright (C) 2011--2014 Jason W. Morgan <morgan.746@osu.edu>
###
### boolean3 represents a substantial re-write of the original boolean package
### developed by Bear Braumoeller, Ben Goodrich, and Jacob Kline. This version
### was developed under the direction of Bear Braumoeller and with support from
### The Ohio State University's College of Social and Behavioral Sciences.
###
### boolean3 is free software: you can redistribute it and/or modify it under
### the terms of the GNU General Public License as published by the Free
### Software Foundation, either version 3 of the License, or (at your option)
### any later version.
###
### This program is distributed in the hope that it will be useful, but WITHOUT
### ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
### FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
### more details.
###
### You should have received a copy of the GNU General Public License along with
### this program. If not, see <http://www.gnu.org/licenses/>.
###
### ----------------------------------------------------------------------------
##' Construct a boolean object that can then be fit with \code{boolean}.
##'
##' \code{boolprep} sets up a boolean model object that can then be fit with
##' \code{\link{boolean}}. A properly specified model (contained in \code{...})
##' will contain at least three components. The first component must specify the
##' boolean logic to be employed. For instance, the \code{y ~ (a | b)} formula
##' would indicate a logical \code{or} between the \code{a} and \code{b}
##' submodels, while \code{y ~ (a & b)} would indicate a logical
##' \code{and}. \code{y} is the name of the response variable of
##' interest. Logical operators can be nested; e.g., \code{y ~ (a | (b & c))} is
##' valid. The second and third components are submodels and are specified as
##' usual: \code{a ~ x1 + x2} and \code{b ~ x3 + x4 + x5}, where are the
##' \code{x}-variables are covariates. \code{a}, \code{b}, and \code{c} are
##' labels indicating the submodel position in the boolean specification.
##' @title Specify Boolean Model
##' @param ... formula specification for boolean model. See the details and
##' examples sections below.
##' @param data \code{data.frame} containing the data to be used in the model.
##' @param subset select subset of data. See \code{\link{lm}} for details.
##' @param weights specify model weights (not implemented).
##' @param na.action set \code{na.action}. See \code{\link{lm}} for details.
##' @param offset specify an offset. Offsets are not implemented and this
##' parameter is simply ignored.
##' @param family a model \code{family} to use for estimation. This can be
##' comprised of a list of link functions when the desire is to have model
##' components with different links. \code{binomial} is the only family
##' supported at this time. See \code{\link{family}} for more details.
##' @return \code{boolprep} returns a \code{\link{boolean-class}} object
##' containing the model components needed to estimate a boolean model.
##' @author Jason W. Morgan (\email{morgan.746@@osu.edu})
##' @references Braumoeller, Bear F. (2003) ``Causal Complexity and the Study
##' of Politics.'' \emph{Political Analysis} 11(3): 209--233.
##' @seealso See \code{\link{boolean}} for model estimation.
##' @examples
##' ## Generate some fake data
##' require(mvtnorm)
##' set.seed(12345)
##' N <- 2000
##' Df <- cbind(1, rmvnorm(N, mean=rep(0, 5)))
##'
##' ## Set coefficients
##' beta.a <- c(-2.00, 0.33, 0.66, 1.00)
##' beta.b <- c(0.00, 1.50, -0.25)
##'
##' ## Generate path probabilities following a normal model.
##' y.a <- as.vector(pnorm(tcrossprod(beta.a, Df[, 1:4])))
##' y.b <- as.vector(pnorm(tcrossprod(beta.b, Df[, c(1, 5, 6)])))
##'
##' ## AND and OR-model
##' or <- function(x, y) { x + y - x * y }
##' and <- function(x, y) { x * y }
##' y.star.OR <- or(y.a, y.b)
##' y.star.AND <- and(y.a, y.b)
##'
##' ## Observed responses
##' y.OR <- rbinom(N, 1, y.star.OR)
##' y.AND <- rbinom(N, 1, y.star.AND)
##'
##' ## Set up data.frame for estimation
##' Df <- cbind(1, Df)
##' Df <- as.data.frame(Df)
##' Df[,1] <- y.OR
##' Df[,2] <- y.AND
##' names(Df) <- c("y.OR", "y.AND", "x1", "x2", "x3", "x4", "x5")
##'
##' ## Before estimating, boolean models need to be specified using the
##' ## boolprep function.
##'
##' ## OR model, specified to use a probit link for each causal path. This
##' ## matches the data generating process above.
##' mod.OR <- boolprep(y.OR ~ (a | b), a ~ x1 + x2 + x3, b ~ x4 + x5,
##' data = Df, family=list(binomial("probit")))
##' @export
boolprep <- function(..., data, subset, weights, na.action, offset,
family=list(binomial(link="logit"))) {
call <- match.call(expand.dots=TRUE) # Get model call
frame <- build_frame(call) # Build the model frame
model <- build_model(call) # Build model specification
links <- make.links(family, model) # Make link function(s)
## Set up object and return.
obj <-
structure(list
(call = call,
link = links,
model = model,
N = nrow(frame$frame[[1]]), # Observations
k = sum(sapply(frame$frame, ncol)), # Number of coefficients
coef.labels = set_labels(frame$frame), # Coefficient labels
coef.idx = set_coef(frame$frame), # Coefficient index
response = frame$response, # Extract response
frame = frame$frame), # Model frame
class="boolean")
## Set up scobit lambda (shape) parameter(s) if necessary. coef.idx and k
## need updated if this is the case. CURRENTLY NOT AVAILABLE.
if ("scobit" %in% mapply(function(x) x$name, links)) {
## obj <- scobit_setup(obj)
stop("scobit link function is not supported.")
}
## Build calc_lik closure.
obj$calc_lik <- make_lik(obj)
obj
}
##' Performs a fit of a boolean model as specified by \code{\link{boolprep}}.
##'
##' \code{boolean} performs a fit of a boolean model as specified by
##' \code{\link{boolprep}}.
##' @title Fit a Boolean Model
##' @param obj boolean model object as produced by \code{\link{boolprep}}.
##' @param method string (or string vector) specifying the method(s) of
##' estimation. The specified method(s) should be one of those available from
##' the \code{\link{optimx}} or \code{\link{optim}} functions. A genetic
##' algorithm is available from \code{\link{genoud}}
##' (\code{method="genoud"}). \code{method} defaults to \code{"nlminb"}.
##' @param start numeric vector specifying starting values for each parameter in
##' the model. Must have a length equal to the number of parameters being
##' estimated. Defaults to \code{NULL}, which instructs \code{boolean} to
##' estimate ``sensible'' starting values (currently the coefficient values
##' estimated from a \code{\link{glm}} model).
##' @param ... additional arguments to be passed on to optimizers. Each
##' optimizer provides numerous optional parameters to help improve estimation
##' results. See the provided examples and the documentation for the estimation
##' method of interest.
##' @return A boolean model object containing the fit results (detailed results
##' available in the \code{model.fit} slot).
##' @author Jason W. Morgan (\email{morgan.746@@osu.edu})
##' @references Braumoeller, Bear F. (2003) ``Causal Complexity and the Study of
##' Politics.'' \emph{Political Analysis} 11(3): 209--233.
##' @seealso See \code{\link{boolprep}} for model setup, \code{\link{boolean}}
##' the \code{snow} package for details on clustering (useful when using
##' \code{\link{genoud}}). See \code{\link{optimx}}, \code{\link{optim}}, and
##' \code{\link{genoud}} for detailed documentation on the estimation methods
##' available.
##' @examples
##' ## Generate some fake data
##' require(mvtnorm)
##' set.seed(12345)
##' N <- 2000
##' Df <- cbind(1, rmvnorm(N, mean=rep(0, 5)))
##'
##' ## Set coefficients
##' beta.a <- c(-2.00, 0.33, 0.66, 1.00)
##' beta.b <- c(0.00, 1.50, -0.25)
##'
##' ## Generate path probabilities following a normal model.
##' y.a <- as.vector(pnorm(tcrossprod(beta.a, Df[, 1:4])))
##' y.b <- as.vector(pnorm(tcrossprod(beta.b, Df[, c(1, 5, 6)])))
##'
##' ## AND and OR-model
##' or <- function(x, y) { x + y - x * y }
##' and <- function(x, y) { x * y }
##' y.star.OR <- or(y.a, y.b)
##' y.star.AND <- and(y.a, y.b)
##'
##' ## Observed responses
##' y.OR <- rbinom(N, 1, y.star.OR)
##' y.AND <- rbinom(N, 1, y.star.AND)
##'
##' ## Set up data.frame for estimation
##' Df <- cbind(1, Df)
##' Df <- as.data.frame(Df)
##' Df[,1] <- y.OR
##' Df[,2] <- y.AND
##' names(Df) <- c("y.OR", "y.AND", "x1", "x2", "x3", "x4", "x5")
##'
##' ## Before estimating, boolean models need to be specified using the
##' ## boolprep function.
##'
##' ## OR model, specified to use a probit link for each causal path. This
##' ## matches the data generating process above.
##' mod.OR <- boolprep(y.OR ~ (a | b), a ~ x1 + x2 + x3, b ~ x4 + x5,
##' data = Df, family=list(binomial("probit")))
##'
##' ## Fit a model using the nlminb optimizer (the default). Verbose output is
##' ## requested by specifying trace=1 as a control parameter.
##' (fit <- boolean(mod.OR, method="nlminb", control=list(trace=1)))
##'
##' ## Multiple optimizers can be specified in a single call to boolean. Here
##' ## we fit with the nlm and nlminb optimizers.
##' (fit1 <- boolean(mod.OR, method=c("nlm", "nlminb")))
##'
##' ## The summary function will report the detailed results for each
##' ## optimization method.
##' summary(fit1)
##' @export
boolean <- function(obj, method="nlminb", start=NULL, ...) {
## Verify that obj is a boolean object.
if (!inherits(obj, "boolean"))
stop("boolean requires a boolean object as outputed by boolprep", .call=FALSE)
## Assign the estimation method if it doesn't exist.
if(is.null(obj$method))
obj$method <- method
## Set the starting values if the are not provided.
if (is.null(start))
start <- set_start(obj)
else if (length(start) != obj$k)
stop("wrong number of starting values specified", .call=FALSE)
## Fit the model given the selected optimization method.
if (length(obj$method) == 1) {
fit <- switch(as.character(obj$method),
"genoud"= fit_genoud(obj, start=start, ...),
## "mcmc" = fit_mcmc(obj, start=start, ...),
"SANN" = fit_optim(obj, start=start, ...),
fit_optimx(obj, start=start, ...))
}
else {
fit <- fit_optimx(obj, start=start, ...)
}
## Set the class of the fit object, add it to the boolean object, and return.
class(fit) <- c("boolfit", class(fit))
obj$model.fit <- fit
obj
}
### ----------------------------------------------------------------------------
### Utilities
### ----------------------------------------------------------------------------
make.links <- function(family, model) {
## Make link function(s). Family must be a list of length 1 or equal to the
## number of submodels. If the latter is the case, the family list must have
## the same item names as in model$submods.
## All in family have to be of the "family" class.
if (!(all(Map(class, family) == "family")))
stop("all elements of 'family' must be of a valid 'family' class", .call=FALSE)
## Family has to be of length 1 or the same length as model$submods.
if (! (length(family) %in% c(1, length(model$submods))))
stop("incorrect number of model families specified in 'family'", .call=FALSE)
## If more than one family is specified, their names need to match those in
## submods.
if (length(family) == length(model$submods)) {
if (!all(names(family) %in% names(model$submods)))
stop("submodel identifiers in 'family' do not match those of the specified model", .call=FALSE)
}
## If one family specified, replicate it to match the length of the submods.
if (length(family) == 1) {
family <- rep(list(family[[1]]), length(model$submods))
names(family) <- names(model$submods)
}
## Iterate through family list, creating requested link functions.
Map(function(x) {bool.link(x$link)}, family)
}
get_rhs <- function(form) {
## Get the right hand side of a formula object.
as.formula(paste("~", as.character(form[3])))
}
get_lhs <- function(form) {
## Get the response variable from the formula.
as.character(form[2])
}
set_labels <- function(frame) {
## Set coefficient label names.
labels <- c()
for (m in names(frame)) {
lbl <- paste(colnames(frame[[m]]), m, sep = "_")
labels <- c(labels, lbl)
}
labels
}
build_modframe <- function(formulas) {
## Build a single model specification based on the formula inputs.
forms <- as.character(as.list(formulas)[-1])
## Check that the first formula is a boolean-type formula. stop() if not.
if (!isTRUE(grep("[&|]", forms[[1]]) == 1))
stop("first formula does not appear to be a boolean specification", .call=FALSE)
## Check that there are at least three formula entries. stop() if not.
if (length(forms) < 3)
stop("insufficient number of formulas", .call=FALSE)
## Replace boolean operators with `+'. All parentheses to "". Then convert
## all formula strings to formula objects.
forms <- as.list(forms)
forms[[1]] <- gsub("[&|]", "+", gsub("[()]", "", forms[[1]]))
for (i in 1:length(forms)) {
f <- forms[[i]]
forms[[i]] <- as.formula(forms[[i]])
}
## Verify that all formulas have a response == 1; i.e., they have to be
## two-sided formulas. stop() if not.
for (i in 1:length(forms)) {
r <- attr(terms(forms[[i]]), "response")
if (r != 1)
stop("all formulas must specify a single response variable", .call=FALSE)
}
## Check that all components specified in the boolean model exist as
## responses in the other submodels. stop() if not.
v <- rownames(attr(terms(forms[[1]]), "factors"))[-1]
for (i in 2:length(forms)) {
r <- rownames(attr(terms(forms[[i]]), "factors"))[1]
if (!(r %in% v))
stop("component of the boolean formula not specified", .call=FALSE)
}
## Merge the models into a single simple specification. Variables in the
## boolean model are *not* retained (they are latent and so don't actually
## exist in the data).
y <- rownames(attr(terms(forms[[1]]), "factors"))[1]
model <- c()
for (i in 2:length(forms)) {
r <- rownames(attr(terms(forms[[i]]), "factors"))[-1]
model <- c(model, r)
}
model <- paste(y, "~", paste(model, collapse = " + "))
formula(model)
}
build_frame <- function(mod.call) {
## Builds a model.frame from all components included in the call.
formulas <- mod.call[names(mod.call) %in% ""]
formula <- build_modframe(formulas)
## Get the components needed from the model call.
m <- match(c("data", "subset", "weights", "na.action", "offset"),
names(mod.call), 0)
## Build the overall model frame. (This bit of code was adapted from lme4.)
## This model frame will then be used to construct the submodel frames. This
## first full matrix is constructed to assure that there aren't any
## mis-matches in obs due to missing data, subsetting, etc.
frame <- list()
full.matrix <- mod.call[c(1, m)] # get the data.frame name.
full.matrix$formula <- formula # add the full formula obj.
full.matrix$drop.unused.levels <- TRUE # says to drop factor levels
full.matrix[[1]] <- as.name("model.frame") # renames function to model.frame
full.matrix <- eval(full.matrix)
## Loop through the sub-formulas, constructing model frames as specified by
## the user. This guarantees that intercepts can be dropped, for example.
for (m in 3:length(formulas)) {
mname <- get_lhs(formulas[[m]])
rhs <- get_rhs(formulas[[m]])
frame[[mname]] <- model.matrix(rhs, data = full.matrix)
}
list(response = model.response(full.matrix), frame = frame)
}
build_model <- function(mod.call) {
## Builds the model, in boolean form, to be estimated.
## Set up the container for the models.
model <- list(submods=list())
## Get the formulas.
formulas <- mod.call[names(mod.call) %in% ""]
forms <- as.list(as.character(as.list(formulas)[-1]))
## Set the boolean model, adding the boolean operators and parsing it to
## prepare for evaluation.
model$boolmod <- gsub("[&]", "%&%", gsub("[|]", "%|%", forms[[1]]))
model$boolmod <- parse(text = as.character(formula(model$boolmod)[3]))
## Set the submodels.
for (i in 2:length(forms)) {
f <- as.formula(forms[[i]])
n <- rownames(attr(terms(f), "factors"))[1]
model$submods[[n]] <- attr(terms(f), "term.labels")
}
model
}
set_coef <- function(frame) {
## Returns a list of index vectors for the coefficient vector for each of
## the submodels specified in the frame object.
coef.idx <- list()
## Get the total number of coefficients.
b <- rep(0, sum(sapply(frame, ncol)))
i <- 1
for (f in names(frame)) {
k <- ncol(frame[[f]])
##! k <- nrow(frame[[f]])
c <- b
c[i:(i+k-1)] <- 1
coef.idx[[f]] <- as.logical(c)
i <- i+k
}
coef.idx
}
scobit_setup <- function(obj) {
## Set up scobit lambda (shape) parameter(s) if necessary. coef.idx,
## coef.labels, and k need updated. Returns a modified obj.
l <- (mapply(function(x) x$name, obj$link) == "scobit") + 0
names(l) <- paste("lambda", names(l), sep="_")
obj$scobit.lambda <- l[l==1]
obj$k <- obj$k + sum(obj$scobit.lambda) # Update k
obj$coef.idx <- Map(function(x) c(x, rep(FALSE, length(obj$scobit.lambda))),
obj$coef.idx)
obj$coef.labels <- append(obj$coef.labels, names(obj$scobit.lambda))
obj
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.