R/bcgam.R
In bcgam: Bayesian Constrained Generalised Linear Models

Documented in bcgam

#' @title Fitting Bayesian Constrained Generalised Additive Models
#'
#' @description \code{bcgam} is used to fit generalised partial linear regression models using a Bayesian 
#' approach, where shape and smoothness constraints are imposed on nonparametrically modelled predictors 
#' through shape-restricted splines, and no constraints are imposed on optional parametrically modelled covariates.
#'
#' @param formula an object of class \code{\link{formula}} that contains a symbolic description of the model to be fitted.
#' It has the form "response~nonparam+param", where "nonparam" are the predictors to be modelled
#' nonparametrically and "param" are the optional predictors to be modelled parametrically. The user
#' has to specify the relationship between the systematic component \eqn{\eta} and any nonparametrically modelled
#' predictor \eqn{x}. The options are:
#' \itemize{
#' \item sm.incr(x): \eqn{x} is smooth and increasing in \eqn{\eta}. See \code{\link{sm.incr}} for more details.
#' \item sm.decr(x): \eqn{x} is smooth and decreasing in \eqn{\eta}. See \code{\link{sm.decr}} for more details.
#' \item sm.conv(x): \eqn{x} is smooth and convex in \eqn{\eta}. See \code{\link{sm.conv}} for more details.
#' \item sm.conc(x): \eqn{x} is smooth and concave in \eqn{\eta}. See \code{\link{sm.conc}} for more details.
#' \item sm.incr.conv(x): \eqn{x} is smooth, increasing and convex in \eqn{\eta}. See \code{\link{sm.incr.conv}} for more details.
#' \item sm.decr.conv(x): \eqn{x} is smooth, decreasing and convex in \eqn{\eta}. See \code{\link{sm.decr.conv}} for more details.
#' \item sm.incr.conc(x): \eqn{x} is smooth, increasing and concave in \eqn{\eta}. See \code{\link{sm.incr.conc}} for more details.
#' \item sm.decr.conc(x): \eqn{x} is smooth, decreasing and concave in \eqn{\eta}. See \code{\link{sm.decr.conc}} for more details.
#' } 
#' @param family a description of the error distribution and link function to be used in the model. This accepts
#' only the following families: \code{"gaussian"} (normal errors model), \code{"binomial"} (logistic model), and 
#' \code{"poisson"} (Poisson model). See \code{\link{family}} for details of family functions. 
#' @param data an optional data frame, list or environment containing the variables in the model. The default is \code{"NULL"}.
#' @param nloop length of the MCMC. The default is \code{10000}.
#' @param burnin a positive value, smaller than \code{nloop}, that indicates the amount of initial
#' MCMC values to be discarded. By default, it burns-in the first 10\% chain values.
#'
#' @export 
#'
#' @import nimble coda igraph
#'
#' @importFrom grDevices col2rgb rgb
#' @importFrom stats .checkMFClasses .getXlevels contrasts delete.response
#' gaussian model.frame model.matrix model.response na.pass predict
#' quantile rbinom rnorm rpois sd terms
#'
#' @details We assume the additive model for each systematic component element \eqn{\eta_i} given by
#' \deqn{\eta_i = f_1(x_{1i}) + ... + f_L(x_{Li}) + z_i'\gamma,} where \eqn{z_i} is a vector
#' of variables to be modelled parametrically and \eqn{\gamma} is a parameter vector. The functions
#' \eqn{f_l} of the continuous predictors \eqn{x_l} are assumed to be smooth, and shape restrictions
#' such as monotonicity and/or convexity might be assumed. Generally, the vector \eqn{\eta=(\eta_1, ..., \eta_n)'}
#' is approximated by \deqn{\sum_{j=1}^{m_1}\beta_{1j}\delta_{1j} + ... + \sum_{j=1}^{m_L}\beta_{Lj}\delta_{Lj} 
#' + \sum_{j=1}^{p}\alpha_j\nu_j,} where \eqn{\beta_{lj} \ge 0} for all \eqn{l,j}. The \eqn{\delta}'s represent
#' the basis vectors used to approximate the \eqn{f} functions. The \eqn{\nu_j} consists of the
#' one vector and the vectors of the observed values of covariates to be modelled parametrically. In addition, 
#' when \eqn{f_l} is assumed to be convex, the \eqn{x_l} vector is included as one of the \eqn{\nu_j}.
#'
#' A Bayesian approach is considered for estimation and inference of the model above. As the \eqn{\beta}
#' coefficients are constrained to be non-negative, then a gamma prior with hyperparameters \eqn{c_{l1}} (shape)
#' and \eqn{c_{l2}} (scale) is assumed for each \eqn{\beta_{lj}}. The values \eqn{c_{l1}} and \eqn{c_{l2}} are
#' chosen in a way that a large variance can be combined with a small mean, so that it is close to a 
#' non-informative gamma prior. Further, a normal prior distribution with mean zero and large variance \eqn{M}
#' is considered for the \eqn{\alpha} coefficients.
#'
#' \code{bcgam} makes use of the system "nimble" to set the Bayesian (hierarchical) model and compute the MCMC. Hence, 
#' "nimble" has to be loaded in \code{R} to be able to use \code{bcgam}. Information about how to download
#' and install "nimble" can be found at \url{https://r-nimble.org}.
#'
#' @return \code{bcgam} returns an object of class "bcgam".
#' 
#' The generic routines \code{summary} and \code{print} are used to obtain and print a summary
#' of the results. Further, 2D and 3D plots can be created using \code{plot} and \code{persp}, respectively.
#'
#' An object of class "bcgam" is a list containing at least the following components:
#' \item{coefs}{a vector of posterior means of the \eqn{\alpha} and \eqn{\beta} coefficients.} 
#' \item{sd.coefs}{a vector of posterior standard errors of the \eqn{\alpha} and \eqn{\beta} coefficients.} 
#' \item{etahat}{a vector of posterior means of the systematic component \eqn{\eta}.}
#' \item{muhat}{a vector of posterior means of \eqn{\mu}. \eqn{\mu} is obtained by 
#'  transforming \eqn{\eta} using the inverse of the link function.}
#' \item{alpha.sims}{a matrix of posterior samples (after burn-in) of the \eqn{\alpha} coefficients.}
#' \item{beta.sims}{a matrix of posterior samples (after burn-in) of the \eqn{\beta} coefficients.}
#' \item{sigma.sims}{a matrix of posterior samples (after burn-in) of \eqn{\sigma}. This is only
#'  shown when \code{family="gaussian"}. }
#' \item{eta.sims}{a matrix of posterior samples (after burn-in) of the systematic component \eqn{\eta}.}
#' \item{mu.sims}{a matrix of posterior samples (after burn-in) of \eqn{\mu}.
#' \eqn{\mu} is obtained by transforming \eqn{\eta} using the inverse of the link function.}
#' \item{delta}{a matrix that contains the basis functions \eqn{\delta} in its columns. }
#' \item{zmat}{a matrix that contains the vectors \eqn{\nu} in its columns. }
#' \item{knots}{a list of the knots.}
#' \item{shapes}{a list of numbers that indicate the shape categories.}
#' \item{sps}{a character vector of the space parameter used to create the knots.}
#' \item{nloop}{the length of the MCMC.}
#' \item{burnin}{the burn-in value.}
#' \item{family}{the family parameter.}
#' \item{y}{the response variable.}
#'
#' @seealso \code{\link{predict.bcgam}}
#'
#' @author Cristian Oliva-Aviles and Mary C. Meyer
#'
#' @references Meyer, M. C. (2008) Inference using shape-restricted regression splines. 
#' \emph{Annals of Applied Statistics} \strong{2(3)}, 1013-1033.
#'
#' Meyer, M. C., Hackstadt, A. J., and Hoeting J. A. (2011) Bayesian estimation and inference for 
#' generalised partial linear models using shape-restricted splines. \emph{Journal of Nonparametric
#' Statistics} \strong{23(4)}, 867-884. 
#'
#' @examples
#' \dontrun{
#' ## Example 1 (gaussian)
#' data(duncan)
#'
#' bcgam.fit <- bcgam(income~sm.incr(prestige, space="E")+sm.conv(education)+type, data=duncan)
#' print(bcgam.fit)
#' summary(bcgam.fit)
#' plot(bcgam.fit, prestige, col=4)
#' persp(bcgam.fit, prestige, education, level=0.90)
#'
#'
#' ## Example 2 (poisson)
#' set.seed(2018)
#' n<-50
#' x1<-sqrt(1:n)
#' z<-as.factor(rbinom(n, 1, 0.5))
#' log.eta<-x1/7+0.2*as.numeric(z)+rnorm(50, sd=0.6)
#' eta<-exp(log.eta)
#' y<-rpois(n,eta)
#'
#' bcgam.fit <- bcgam(y~sm.conv(x1)+z, family="poisson")
#' summary(bcgam.fit)
#' predict(bcgam.fit, newdata=data.frame(x1=0.2, z="0"), interval="credible")
#' plot(bcgam.fit, x1, col=3, col.inter=4)
#' }
bcgam <- function(formula, family=gaussian(), data=NULL, nloop=10000, burnin=trunc(nloop/10))
{
cl<-match.call()
 if (is.character(family)) 
        family <- get(family, mode = "function", envir = parent.frame())
    if (is.function(family)) 
        family <- family()
    if (is.null(family$family)) 
        stop("'family' not recognized!")
mf<-match.call(expand.dots=FALSE)    
m <- match(c("formula", "data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
ynm <- names(mf)[1]
mt <- attr(mf, "terms")
y <- model.response(mf, "any")
n <- length(y)
 if (family$family == "binomial") {

        if (class(y) == "factor") {
            y = ifelse(y == levels(y)[1], 0, 1)
        }
    }
    
	shapes1 <- NULL
	xmat <- NULL
	xmatnms <- NULL
	nums <- NULL
    	ks <- list()
    	sps <- NULL
    	xid <- 1

    for (i in 2:ncol(mf)) {
        if (is.numeric(attributes(mf[, i])$shape)) {
            shapes1 <- c(shapes1, attributes(mf[, i])$shape)
            xmat <- cbind(xmat, mf[, i])
         	xmatnms <- c(xmatnms, attributes(mf[, i])$nm)
            nums <- c(nums, attributes(mf[, i])$numknots)
            sps <- c(sps, attributes(mf[, i])$space)
            ks[[xid]] <- attributes(mf[, i])$knots
            xid <- xid + 1
        }
	}

	if(is.null(xmatnms)){stop("No variables to be modeled nonparametrically!")}

    	znms <- NULL  
	zid <- NULL  
	is_fac<-NULL 
	vals<-NULL 
	dist<-0
	zid1<-NULL
	zid2<-NULL
	zmat<-NULL

	ind.nonpara<-NULL  
	zmat<-NULL
	for (i in 2:ncol(mf)) {
      	if (is.numeric(attributes(mf[, i])$shape)) {
			ind.nonpara<-c(ind.nonpara,i)
		}
	}
	
	ind.nonparaplusinter <- c(1,ind.nonpara)
	zmat.matrix<-model.matrix(mt,mf, contrasts)
	zmat<-as.matrix(zmat.matrix[,-ind.nonparaplusinter])
	znms<-colnames(zmat.matrix)[-ind.nonparaplusinter]

####

getdefault.values<-function(x.var){
	ans<-NULL
	if(is.factor(x.var)){ans <- x.var[which.max(table(x.var))]}
	else{
	sort.x <- sort(x.var)
	pos.value <- ceiling(length(sort.x)/2)
	ans <- sort.x[pos.value]}
	ans	
	}
	
default.plotvalues<- data.frame(lapply(mf[,-1],getdefault.values))
colnames(default.plotvalues)[1:length(xmatnms)] <- xmatnms

####
	if (family$family == "binomial" | family$family == "poisson") {
        wt.iter = TRUE
    }
    else {
        wt.iter = FALSE
    }

	if (is.null(shapes1)) {
        nloop <- 0
    }

    xmat0 <- xmat
    shapes0 <- shapes1
    nums0 <- nums  #number of knots
    ks0 <- ks     #knots
    sps0 <- sps
    xmatnms0 <- xmatnms
    nloop <- nloop
    burnin <- burnin

ans <- bcgam.fit(y, xmat0, shapes0, zmat, nums0, ks0, sps0, 
nloop,burnin, family, xmatnms0, znms )

class(ans) <- "bcgam"
	ans$xmat <- xmat0
	ans$xmatnms <- xmatnms0
	ans$nloop <- nloop
	ans$burnin <- burnin
	ans$shapes <- shapes0
	ans$y <- y
	ans$default.plotvalues=default.plotvalues
	ans$data <- mf
	ans$family <- family$family
	ans$ind_nonparam <- ind.nonpara
	ans$ks <- ks0
	ans$sps <- sps0

    	ans$levels <- .getXlevels(mt, mf)
    	ans$call <- cl
    	ans$terms <- mt
ans
}