#' Boltzmann Bayes Learning Inference
#'
#' Main driver for bbl inference
#'
#' Formula argument and data are used to tabulate xlevels unless explicitly
#' given as list. Data are expected to be factors or integers. This function
#' is a driver interepreting formula and calls \code{bbi.fit}. Will stop with
#' error if any predictor has only one level unless \code{novarOk='TRUE'}.
#' Use \code{\link{removeConst}} to remove the non-varying predictors before
#' calling if this happens.
#'
#' @param formula Formula for modeling
#' @param data Data for fitting. If missing, must be explicitly included in
#' \code{formula}.
#' @param freq Vector of non-negative integer frequencies, recoding
#' the number of times each row of data must be repeated. If \code{NULL},
#' assumed to be all 1. Fractional weights are not supported.
#' @param xlevels List of factor levels for predictors. If \code{NULL},
#' will be inferred from data with factor levels ordered alphanumerically.
#' @param verbose Output verbosity level. Will be send to down-stream function
#' calls with one level lower.
#' @param method BB inference algorithm; pseudo-likelihood inference (\code{'pseudo'})
#' or mean field (\code{'mf'}).
#' @param novarOk If \code{TRUE}, will proceed with predictors having only one
#' level.
#' @param testNull Repeat the inference for the `pooled' sample; i.e., under the
#' null hypothesis of all rows in data belonging to a single group.
#' @param ... Other parameters to \code{\link{mlestimate}}.
#' @return
#' A list of class \code{bbl} with the following elements:
#' \item{coefficients}{List of inferred coefficients with elements
#' \code{h}, \code{J}, \code{h0}, and \code{J0}. The bias
#' parameter \code{h} is a list of length equal to no. of
#' response groups, each of which is a list of the same struture as
#' \code{xlevels}: length equal to no. of predictors, containing vectors of
#' length equal to each predictor factor levels:
#' \eqn{h_i^{(y)}(x)} represented by \code{h[[y]][[i]][x]}.
#' The interaction parameter \code{J} is a list of lists of dimension
#' \eqn{m \times m}, where \eqn{m} is the number of predictors. Each
#' element is a matrix of dimension \eqn{L_i \times L_j}, where \eqn{L_i}
#' and \eqn{L_j} are numbers of factor levels in predictor \code{i} and
#' \code{j}: \eqn{J_{ij}^{(y)}(x_1,x_2)} represented by
#' \code{J[[y]][[i]][[j]][x1,x2]}. All elements of lists are named.
#' The pooled parameters \code{h0} and \code{J0}, if computed,
#' are of one less dimension, omitting response group argument.}
#' \item{xlevels}{List of vectors containing predictor levels.}
#' \item{terms}{The \code{terms} of \code{formula} input.}
#' \item{groups}{Vector of response groups.}
#' \item{groupname}{Name of the response variable.}
#' \item{qJ}{Matrix of logicals whose elements record whether
#' \code{formula} includes interaction between the two predictors.}
#' \item{model}{Model data frame derived from \code{formula} and \code{data}.}
#' \item{lkh}{Log likelihood.}
#' \item{lz}{Vector log partition function. Used in \code{\link{predict}}.}
#' \item{freq}{Frequency vector input.}
#' \item{call}{Function call.}
#' \item{df}{Degrees of freedom.}
#' @examples
#' titanic <- as.data.frame(Titanic)
#' freq <- titanic$Freq
#' titanic <- titanic[,1:4]
#' b <- bbl(Survived ~ .^2, data=titanic, freq=freq)
#' b
#' @import Rcpp
#' @import stats
#' @import graphics
#' @importFrom grDevices colorRampPalette gray.colors
#' @useDynLib bbl
#' @export
bbl <- function(formula, data, freq=NULL, xlevels=NULL, verbose=1,
method='pseudo', novarOk=FALSE, testNull=TRUE, ...){
cl <- match.call()
if(missing(data)) data <- environment(formula)
if(!is.null(freq)){
if(length(freq)!=NROW(data))
stop('Length of freq does not match data')
zero <- freq==0
data <- data[!zero,]
freq <- freq[!zero] # remove rows with zero freq
}
term <- stats::terms(formula, data=data)
idy <- attributes(term)$response
vars <- as.character(attributes(term)$variables)
resp <- vars[[idy+1]]
vars <- vars[!(vars %in% c('list',resp))]
# xlevels <- .getXlevels(term, m=data)
if(is.null(xlevels))
xlevels <- getxlevels(vars, data=data)
formula <- formula(term)
if(!novarOk){
for(i in seq_along(xlevels)){
if(length(xlevels[[i]])==1)
stop(paste0('Predictor ',names(xlevels)[i],' has one level'))
}
}
m <- length(xlevels)
idy <- attributes(term)$response
resp <- as.character(attributes(term)$variables[[idy+1]])
y <- data[,resp]
x <- data[,names(xlevels)]
Ly <- length(levels(factor(y)))
if(Ly==1) warning('Only one response group in data')
label <- attr(term,'term.labels')
ilabel <- label[vapply(label,FUN=function(x){grepl(':',x)}, logical(1))]
ilabel <- gsub('`','',ilabel)
ijlabel <- strsplit(ilabel,split=':')
qJ <- matrix(FALSE, nrow=m, ncol=m)
rownames(qJ) <- colnames(qJ) <- names(xlevels)
for(k in seq_along(ijlabel))
qJ[ijlabel[[k]][1],ijlabel[[k]][2]] <- TRUE
qJ <- qJ | t(qJ) # TRUE for all interacting pairs of predictors
naive <- sum(qJ)==0
b <- bbl.fit(x=x, y=y, qJ=qJ, freq=freq, xlevels=xlevels,
verbose=verbose-1, method=method, ...) # alternative
if(testNull)
b0 <- bbl.fit(x=x, y=rep('pooled',length(y)), qJ=qJ, freq=freq,
xlevels=xlevels, verbose=verbose-1, method=method, ...) # null
else b0 <- NULL
bb <- list()
class(bb) <- 'bbl'
bb$coefficients <- list(h=b$h, J=b$J, h0=b0$h[[1]], J0=b0$J[[1]])
bb$xlevels <- xlevels
bb$terms <- term
bb$groups <- levels(factor(y))
bb$groupname <- resp
bb$qJ <- qJ
bb$model <- data[,c(resp, names(xlevels))]
bb$lkh <- b$lkh
bb$lz <- b$lz
bb$freq <- freq
bb$call <- cl
if(naive) bb$method <- 'mf'
else bb$method <- method
df <- 0
for(i in seq_len(m)){
ni <- length(xlevels[[i]])-1
df <- df + ni
if(i==m) next()
for(j in seq(i+1,m))
if(qJ[i,j]) df <- df + ni*(length(xlevels[[j]])-1)
}
bb$df <- df
return(bb)
}
#' bbl Inference with model matrix
#'
#' Performs bbl inference using response vector and predictor matrix
#'
#' This function would normally be called by \code{\link{bbl}} rather than
#' directly. Expects the predictor data \code{x} and response vector \code{y}
#' instead of formula input to \code{\link{bbl}}.
#'
#' @param x Data frame of factors with each predictor in columns.
#' @param y Vector of response variables.
#' @param qJ Matrix of logicals indicating which predictor combinations
#' are interacting.
#' @param freq Vector of non-negative integer frequencies, recoding
#' the number of times each row of data must be repeated.
#' If \code{NULL}, assumed to be all 1. Fractional weights are not
#' supported.
#' @param xlevels List of factor levels for predictors. If \code{NULL},
#' will be inferred from data with factor levels ordered alphanumerically.
#' @param verbose Verbosity level of output. Will be propagated to
#' \code{\link{mlestimate}} with one level down.
#' @param method \code{c('pseudo','mf')}; inference method.
#' @param ... Other arguments to \code{\link{mlestimate}}.
#' @return List of named components \code{h}, \code{J}, \code{lkh}, and
#' \code{lz}; see \code{\link{bbl}} for information regarding these
#' components.
#' @examples
#' titanic <- as.data.frame(Titanic)
#' freq <- titanic$Freq
#' x <- titanic[,1:3]
#' y <- titanic$Survived
#' b <- bbl.fit(x=x,y=y, freq=freq)
#' b
#' @export
bbl.fit <- function(x, y, qJ=NULL, freq=NULL, xlevels=NULL, verbose=1,
method='pseudo', ...){
le <- levels(factor(y))
Ly <- length(le)
h <- J <- list()
m <- NCOL(x)
vars <- colnames(x)
if(is.null(xlevels))
xlevels <- getxlevels(colnames(x), data=x)
if(is.null(qJ)){
qJ <- matrix(TRUE, nrow=m, ncol=m)
diag(qJ) <- FALSE
rownames(qJ) <- colnames(qJ) <- vars
}
if(!all.equal(dim(qJ), c(m,m)))
stop('Incorrect dimension of qJ')
lkh <- 0
lz <- rep(0, Ly)
for(iy in seq_len(Ly)){
ny <- sum(y==le[iy])
if(ny==0)
stop(paste0('No instance of "',le[iy],'" in training data'))
da <- x[y==le[iy],]
if(!is.null(freq))
frq <- freq[y==le[iy]]
else frq <- rep(1, sum(y==le[iy]))
if(verbose > 1) cat(' Inference for y = ',le[iy],':\n',sep='')
# xda <- data.matrix(da) - 1
xda <- matrix(0, nrow=NROW(da), ncol=NCOL(da))
L <- NULL
for(i in seq_len(NCOL(da))){
xda[,i] <- match(da[,i],xlevels[[i]])-1
L[i] <- length(xlevels[[i]])
}
b <- mlestimate(xi=xda, freq=frq, qJ=qJ, verbose=verbose-1,
method=method, L=L, ...)
names(b$h) <- names(b$J) <- names(xlevels)
for(i in seq_len(m)){
ni <- xlevels[[i]][-1][seq_along(b$h[[i]])]
names(b$h[[i]]) <- ni
names(b$J[[i]]) <- names(xlevels)
for(j in seq_len(m)){
if(NROW(b$J[[i]][[j]])>0)
rownames(b$J[[i]][[j]]) <- ni[seq_len(NROW(b$J[[i]][[j]]))]
if(NCOL(b$J[[i]][[j]])>0)
colnames(b$J[[i]][[j]]) <-
xlevels[[j]][-1][seq_len(NCOL(b$J[[i]][[j]]))]
}
}
h[[iy]] <- b$h
J[[iy]] <- b$J
lkh <- lkh + b$lkh*ny
lz[iy] <- b$lz
}
names(lz) <- names(h) <- names(J) <- le
return(list(h=h, J=J, lkh=lkh, lz=lz))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.