# R/zvgam.R In lllcrc: Local Log-linear Models for Capture-Recapture

#### Documented in AICc.vgamconstruct.vgamvgam.crc

#' Compute the AICc for a VGAM model
#'
#' The AICc, or Akaike Information Criterion with small sample correction, is
#' computed for a VGAM model.
#'
#' See references.  Burnham and Anderson (2002).  This correction to the AIC is
#' derived under some assumption of normality, so it's validity may depend on
#' there being large counts for each capture pattern so that the multivariate
#' normal approximation to the multinomial is good.
#'
#' The AIC methods for VGAM purport to have the AICc option for the
#' \code{vglm}, but this did not seem to exist for the \code{vgam} as of
#' October 2013.
#'
#' @param mod A VGAM model object
#' @return \item{AICc}{The value of the AICc}
#' @author Zach Kurtz
#' @references
#' \url{http://stats.stackexchange.com/questions/69723/two-different-formulas-for-aicc}
#' @export AICc.vgam
AICc.vgam = function(mod){
uncorrected = AIC(mod)
n = nrow(mod@fitted.values)
npar = length(mod@coefficients) + sum(mod@nl.df)
correction = 2*npar*(npar+1)/(npar + n - 1)
out = uncorrected + correction
return(out)}

#' Make a VGAM model
#'
#' A wrapper function to fit a CRC model using the vgam command in the VGAM
#' package.
#'
#' Even as a wrapper function, this still primarily a workhorse function for
#' \code{vgam.crc}.
#'
#' @param sdf Nonlinear degrees of freedom.  See VGAM documentation for more on
#' what this means.
#' @param constr.cols A vector of column indices.
#' @param constraints A transformed log-linear design matrix.  The submatrix
#' indicated by \code{constr.cols} is the desired argument to the VGAM command
#' to impose the corresponding log-linear model on the data.
#' @param dat A CRC dataset in standard form (i.e., as output of
#' \code{format.data})
#' @return A \code{vgam} model object
#' @author Zach Kurtz
#' @references See references for \code{\link{vgam.crc}}.
#' @export construct.vgam
#' @import VGAM
construct.vgam = function(sdf, constr.cols = NULL, constraints, dat){
#constr.cols = c(models[[i]]); constraints = cons.mat
#sdf is a vector of smoothing spline dfs for the respective continuous predictors
cons.sub = constraints[,constr.cols]
if(ncol(cons.sub) == ncol(constraints)) cons.sub = diag(ncol(constraints)) # default, most complex model
# Variable names
x.covs = colnames(dat)[substr(colnames(dat),1,1) == "x"]
y.bits = colnames(dat)[substr(colnames(dat),1,1) == "y"]
# Construct vgam formula
n.covs = length(x.covs)
s.covs = rep(NA, n.covs)
v=1
for(i in 1:length(x.covs)){
if(substr(x.covs[i],3,5) == "con"){
if(v>length(sdf)) stop("Length of sdf does not match the number of continuous variables")
s.covs[i] = paste("s(", x.covs[i], ", df = ", sdf[v],")", sep = "")
v=v+1
}else{	s.covs[i] = x.covs[i]}
}
cons = vector(mode = 'list', length = n.covs+1)
for(i in 1:length(cons)) cons[[i]] = cons.sub
names(cons)[1] = '(Intercept)'; for(i in 1:length(x.covs)) names(cons)[1+i] = s.covs[i]
x.part = paste(s.covs, collapse = "+")
y.part = paste(y.bits, collapse = ",")
vgam.form = paste("vgam(cbind(", y.part, ") ~ ", x.part,
", data = dat, multinomial, constraints = cons)", sep = "")
mod = eval(parse(text = vgam.form))
return(mod)
}

#' Build a VGAM CRC model
#'
#' A high-level function to fit a VGAM CRC model to standardized data (the
#' output of format.data).
#'
#' Implements, approximately, the method of Zwane (2004).  Serves mainly as a
#' user-friendly interface to the VGAM package.
#'
#' @param dat The CRC data, as output of \code{format.data}
#' @param models A list of models -- or an expression that returns a
#' list of models -- to be considered in local model search.  Run the default,
#' \code{\link{make.hierarchical.term.sets}(k = attributes(dat)$k)}, to see an example. #' @param sdf A vector, with length corresponding to the number of continuous #' predictor variables, that states the desired effective degrees of freedom #' for the corresponding smooth spline in VGAM. #' @param llform A character vector of predictors of the form "c1", "c2" for #' main effects, or "c12" for an interaction. By default, the function #' \code{AICc.vgam} is used to select the best set of terms among the candidate #' sets that are proposed by the function \code{\link{make.hierarchical.term.sets}}. #' @param round.vars See \code{\link{micro.post.stratify}}, which is called #' within \code{vgam.crc}. #' @param rounding.scale See \code{\link{micro.post.stratify}}, which is called #' within \code{vgam.crc}. #' @param boot.control A list of control parameters for bootstrapping the #' sampling distribution of the estimator(s). By default, there is no #' bootstrapping. #' @return \item{est}{A point estimate of the population size} #' \item{llform}{The set of log-linear terms} \item{dat}{The output of function #' \code{micro.post.stratify}, with estimated local rates of missingness #' appended as an extra column labeled \code{pi0}. In addition, \code{mct} #' (multinomial cell count) gives the number of observed units with that #' distinct covariate vector, and \code{cpi0} (cumulative number missing) gives #' the the product of \code{pi0} with \code{mct}, such that summing over this #' vectorized product is exactly the Horvitz-Thompson style sum in capture #' recapture. } \item{aic}{The AICc for the chosen VGAM, as computed by #' function \code{AICc.vgam}} \item{mod}{The VGAM model object; see the #' \code{vgam} function in package \code{VGAM} for details} \item{...}{The #' output is of class \code{vgam.crc} and has attributes \code{cont.x} and #' \code{conteg.x}, which relate the continuous and categorical variables in #' the model } #' @author Zach Kurtz #' @references #' Zwane E and Heijden Pvd (2003). "Implementing the parametric bootstrap #' in capture-recapture models with continuous covariates." \emph{Statistics & #' Probability Letters}, \bold{65}, pp. 121-125. #' @references #' Zwane E and Heijden Pvd (2004). "Semiparametric models for #' capture-recapture studies with covariates." \emph{Computational Statistics & #' Data Analysis}, \bold{47}, pp. 729-743. #' @export vgam.crc vgam.crc = function(dat, models = make.hierarchical.term.sets(k = attributes(dt)$k),
sdf, llform = NULL, round.vars = NULL, rounding.scale = NULL, boot.control = NULL)
{
#dat = dt; sdf = 4; rounding.scale = 5; llform = NULL; boot.control = list(n.reps= 5, seed = 4); llform = c("c1","c2","c3","c23")
# dat must be in the form of output of format.data()
con.cov = colnames(dat)[substr(colnames(dat),1,5) == "x.con"]
dis.cov = colnames(dat)[substr(colnames(dat),1,5) == "x.dis"]
y.bits  = colnames(dat)[substr(colnames(dat),1,1) == "y"    ]
# Consider rounding the continuous covariates to reduce the covariate space
if(length(round.vars) != length(rounding.scale)) stop("round.vars needs to have the same number of entries as rounding.scale")
if(is.null(round.vars)) {cdt = dat; cdt$mct = rep(1, nrow(cdt)) }else{cdt = micro.post.stratify(dat, round.vars, rounding.scale)} #Construct the full constraints matrix: k = nchar(y.bits[1])-1 des = make.design.matrix(k=k) #Switching to a logistic paradigm with "11...1" as the reference outcome, # here is the matrix of so called "constraints" in VGAM: cons.mat = 1-as.matrix(des[1:(2^k-2),]) if(is.null(llform)){ if(is.null(models)) stop("you need to choose a non NULL value for either llform or models") n.mod = length(models) option.scores = rep(NA, n.mod) for(i in 1:n.mod){ cat(paste("Computing the AICc for", i, "of", n.mod, "models \n")) llform = models[[i]] #loglinear terms mod = construct.vgam(sdf = sdf, constr.cols = c(models[[i]]), constraints = cons.mat, dat = cdt) option.scores[i] = AICc.vgam(mod) } llform = models[[which.min(option.scores)]] } #Finally, build model: mod = construct.vgam(sdf, constr.cols = llform, constraints = cons.mat, dat = cdt) fits = fitted(mod) cdt[,y.bits] = fits colnames(fits) = substring(colnames(fits),2) cdt$pi0 = saturated.local(fits)
cdt$cpi0 = cdt$pi0*cdt$mct hpi = attributes(mod)$fitted.values
colnames(hpi) = substring(colnames(hpi),2)
out = list(est = sum(cdt$cpi0), llform = llform, dat = cdt, aic = AICc.vgam(mod), mod = mod, hpi = hpi) # Bootstrap, if requested: if(!is.null(boot.control)){ if(!is.null(boot.control$seed)) set.seed(boot.control$seed) n.reps = boot.control$n.reps
b.est = rep(NA, n.reps)
b.cpi0 = matrix(NA, ncol = n.reps, nrow = nrow(cdt))
boot.list = list(dat = cdt, dens=fits, sdf = sdf, cpi0 = cdt$cpi0, models = models, cons.mat = cons.mat) for(i in 1:n.reps){ print(paste("vgam.crc is working on bootstrap iteration", i)) bb = vgam.crc.boots(boot.list) b.est[i] = sum(bb$cpi0, na.rm = TRUE)
b.cpi0[,i] = bb$cpi0 } out$boots = list(est = b.est, loc.est = b.cpi0, n.reps = n.reps)
}
class(out) = "vgam.crc"
attributes(out)$cont.x = names(out$dat)[substr(names(out$dat), 1,5) == "x.con"] attributes(out)$categ.x = names(out$dat)[substr(names(out$dat), 1,5) == "x.dis"]
return(out)}


## Try the lllcrc package in your browser

Any scripts or data that you put into this service are public.

lllcrc documentation built on May 30, 2017, 7:10 a.m.