R/model.R
In ipeglim: Imprecise Inferential Framework on Statistical Reasoning

Documented in model

#' @title 
#' Describing Sampling Model for Imprecise Inferential Framework
#'
#' @description 
#' The function \code{model} is used to describe sampling models, specified by 
#' a symbolic description of the linear predictor in \code{\link{formula}}.
#'
#' @param formula 
#' an object of class \code{\link{formula}}; 
#' A symbolic description of the model to be fitted;
#' The details of model specification are given under \sQuote{Details}.
#'
#' @param data 
#' an optional data frame, list or environment containing variables used for 
#' a description of the model in \code{\link{formula}}.
#' If not found in \code{data}, variables are taken from
#' \code{environment(formula)}, where the \code{model} is called.
#' 
#' @param dist 
#' a sampling distribution of a response variable;
#' Defaults to \code{poisson}.
#' See \sQuote{Details}.
#'
#' @param ztrunc 
#' a logical value; Is the zero truncated?; Defaults to \code{FALSE}.
#'
#' @param verbose
#' a logical value; Be more verbose about the process by displaying messages;
#' Defaults to \code{FALSE}.
#'
#' @details
#' The function \code{model} is the first task on performing an imprecise
#' inferential framework.
#' To tag this stage, \code{stage} has the character string \code{model}
#' that is the name of environment called.
#' 
#' All details are identical to those of \code{\link{formula}} in
#' \code{\link{glm}}.
#' In the context of regression, 
#' \code{y~0} represents a model that does not involve any variables in 
#' \code{data}. 
#' \code{y~1} represents a model that involve an intercept term, but no other
#' variables in \code{data}.
#' \code{y~.} represents a model that involve all variables in \code{data}.
#'
#' @return 
#' An object of class \code{imprecise} with the components:
#' \item{xreg}{the logical value, Is the model described in \code{formula} 
#' regression?}
#' \item{ztrunc}{the logical value supplied.}
#' \item{formula}{the formula supplied.}
#' \item{y}{the model reponse used in \code{formula}.}
#' \item{X}{the model matrix used in \code{formula}.}
#' \item{fit}{the object of classes \code{ztpreg} or \code{glm} depending on
#' the value of \code{ztrunc}; the model is fitted by a maximum likelihood 
#' method.}
#' \item{stage}{the environment name called.}
#' \item{init}{the list of estrimates for parameters in \code{model} and 
#' and its variance-covariance matrix.}
#'
#' @seealso 
#' \code{\link{glm}}, \code{\link{formula}}, \code{\link{ztpreg}}
#' 
#' @examples
#' \dontrun{
#' # not provided yet
#' }
#' 
#' @note
#' The current version provides the only sampling model \code{poisson}.
#' In the near future, \code{binomial} and \code{normal} will be added.
#'
#' @references
#' Lee (2013) ``Imprecise inferential framework'', PhD thesis.
#'
#' @keywords 
#' zero-truncation, models, regression, imprecise Poisson
#' 
#' @author Chel Hee Lee <\email{gnustats@@gmail.com}>
#' @export
model <- function(formula, data, dist=c("poisson", "binom", "geom"), 
                  ztrunc=FALSE, verbose=FALSE){

  # sanity check
  dist <- match.arg(dist)
  mf <- match.call(expand.dots=FALSE)
  stage <- mf[[1]]
  
  stopifnot(!missing(formula))
  stopifnot(!missing(ztrunc), is.logical(ztrunc), length(ztrunc)==1)
  if (missing(data)) {
    data <- environment(formula)
  }
  
  if (verbose) {
    message(mf[[1]], ":\nInput sanity check .................... PASS!")
  }
  
  # call, formula, and model frame
  m <- match(c("formula", "data"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1L]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())

  # extract terms, model matrix, and response
  mt <- terms(x=formula, data=data)
  X <- model.matrix(object=mt, data=mf)
  y <- model.response(mf, "numeric")
  # print(y)
  
  # variable naming convention
  n <- length(y)
  p <- ncol(X)
  xreg <- (p > 0)
  
  # search for initial values about best guess on optimization
  if (xreg) {
    if (ztrunc) {
      if(verbose) message("regression with ztrunc=TRUE")
      fit <- ztpreg(formula=formula, data=data, dist="poisson", ztrunc=ztrunc)
      init <- list(cfs=as.vector(fit$cfs), vcv=as.matrix(fit$vcv))
      ## it doesn't handle a single observation because of no covariance
    } else {
      if(verbose) message("regression with ztrunc=FALSE")
      fit <- glm(formula=formula, data=data, family=poisson(link="log"))
      init <- list(cfs=coefficients(fit), vcv=vcov(fit))
      ## it doesn't handle a single observation because of no covariance.
      ## in this case, the process should be stopped.
    } 
  } else {
		# non-regression model
		# ff <- terms(formula)
		# formula <- as.formula(paste(ff[[2]],"~1", sep=""))
		
		## TODO:
		## The handler is required when formula[[3]] == 0 (i.e., y~0 is entered)
  
    if (ztrunc) {
      if(verbose) message("non-regression with ztrunc=TRUE")
      # fit <- ztpreg(formula=y~1, data=data, dist="poisson", ztrunc=ztrunc)
      fit <- ztpreg(formula=formula, data=data, dist="poisson", ztrunc=ztrunc)
      init <- exp(fit$cfs)
    } else {
      if(verbose) message("non-regression with ztrunc=FALSE")
      ## NOTE: glm() searches for variables in the global environment not local environment
      # fit <- glm(formula=y~1, data=data, family=poisson(link="log"))
      fit <- glm(formula=formula, data=data, family=poisson(link="log"))
      init <- coefficients(fit)
      
      ## Do we need to use glm() here?  it should be simple for finding the inital values for mean.
      
      
    }
    
  }
  
  rvals <- list(call=call, formula=formula, data=data, y=y, X=X, xreg=xreg, 
                dist=dist, ztrunc=ztrunc, stage=stage, fit=fit, init=init)
  class(rvals) <- "imprecise"
  invisible(rvals)
}
NULL