R/a90logit.R
In ipeglim: Imprecise Inferential Framework on Statistical Reasoning

Documented in a90logit

#' @title Maximum Likelihood Parameter Estimation of Logistic Regression Model 
#' for Capture-Recapture Data
#'
#' @description Logistic regression model is used for estimating the unknown
#' population size using multiple data sources.
#' The model was introduced in the study of Alho (1990) and two data
#' sources with binary indication of a case are used.
#'
#' @param formula a symbolic description of the model to be fit,
#' @param data a data frame containing the variables in the model,
#' @param nlists a number of data sources,
#' @param tol distance-based absolute convergence tolerance.
#' Default to 1e-6.
#' @param max.iter the number of maximum iterations.  Default to 1e2
#' for newton-rasphon method.
#'
#' @return An object of class \code{a90logit} with components including
#' \item{formula}{formula used to be fitted,}
#' \item{converged}{integer code which indicates a successful
#' completion of optimization process,}
#' \item{niters}{integer that indicates a number of iterations until
#' convergence to estimates,}
#' \item{cfs}{estimated regression coefficients,}
#' \item{vcv}{estimated variance-covariance matrix of regression
#' coefficients which is obtained by the inverse of Hessian matrix}
#' \item{llk}{value of log-likelihood function at \code{cfs}.}
#' 
#' @keywords regression
#' @seealso see also
#' @section TODO:
#' Newton-Raphson method used in this algorithm can be replaced by
#' \code{optim()}.
#' 
#' @examples
#' ## Please see the vignette.
#' 
#' @author Chel Hee Lee <\email{gnustats@@gmail.com}>
#' 
#' @references Juha M. Alho (1990), Logistic Regression in
#' Capture-Recapture Models, \emph{Biometrics}, \bold{46}(3), pp. 623-635
#'
#' @section TODO:
#' This program is the initial implementation (without any code
#' optimization).
#' 
#' @export
a90logit <- function(formula, data, nlists=2, tol=1e-6, max.iter=1e2){

  stopifnot(!missing(formula), !missing(data))
  stopifnot(is.numeric(nlists), length(nlists)==1, is.numeric(tol), length(tol)==1, is.numeric(max.iter), length(max.iter)==1)
  
  if(missing(data)) data <- environment(formula)
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "na.action", "weights", "offset"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE

  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())

  mt <- attr(mf, "terms")
  Y <- model.response(mf, "any")
  X <- model.matrix(mt, mf)

  n <- nrow(Y)
  p <- ncol(X)

  XX <- rbind(cbind(X, matrix(0,nrow=n,ncol=p)), cbind(matrix(0,nrow=n,ncol=p),X))
                        
  y <- c(Y)


  tmat <- matrix(c(1,0, 0,1, 1,1), nrow=3, ncol=2, byrow=TRUE);

  u12 <- u1 <- u2 <- rep(0, n)
  u12[which(Y[,1]==1 & Y[,2]==1)] <- 1
  u1[which(Y[,1]==1 & u12!=1)] <- 1
  u2[which(Y[,2]==1 & u12!=1)] <- 1
  mmat <- cbind(u1, u2, u12)
  
  theta <- numeric(nlists*p)
  Xa <- matrix(0, nrow=n, ncol=nlists)
  niters <- 1
  diff <- 1
  converged <- FALSE

  while(!converged){
                
    Xa <- matrix( XX%*%theta, ncol=nlists, nrow=n)
                lp <- exp(Xa)/(1+exp(Xa))
                phi <- lp[,1]+lp[,2]-lp[,1]*lp[,2]

    EL <- t(apply(Xa, 1, function(x) exp(tmat%*%x)/sum(exp(tmat%*%x))) )
    E1 <- apply(EL, 1, function(x) sum(x[tmat[,1]==1]))
    E2 <- apply(EL, 1, function(x) sum(x[tmat[,2]==1]))
    Ey <- c(E1, E2)

    wd1 <- mapply(function(x,y) x/y - (x/y)^2, lp[,1], phi)
    wd2 <- mapply(function(x,y) x/y - (x/y)^2, lp[,2], phi)
    W <- diag(c(wd1, wd2))
    for(i in seq_len(n)) W[i+n, i] <- W[i, i+n] <- lp[i,2]*lp[i,1]/phi[i] - lp[i,2]*lp[i,1]/(phi[i]^2)

    score <- t(XX) %*% (y-Ey)
    invcov <- t(XX) %*% W %*% XX
    new.theta <- theta + solve(invcov, score)

    distance <- sqrt(sum((new.theta - theta)^2))
    theta <- new.theta
    niters <- niters + 1
    converged <- (distance < tol)
        }

  lik.tmp <- t(apply(Xa, 1, function(x) exp(tmat%*%x)/sum(exp(tmat%*%x))) )
  llik <- sum(log(lik.tmp[mmat==1]))
  vcv <- solve(invcov)

  rvals <- list(cfs=theta, vcv=vcv, y=y, X=X, converged=converged,
  niters=niters, llik=llik, n=n, p=p, tmat=tmat, phi=phi, Xa=Xa,
  XX=XX, formula=formula) 
  
  class(rvals) <- c("a90logit")
  return(rvals)
}
NULL