R/check_infinite_estimates.R

Defines functions plot.inf_check check_infinite_estimates.glm

Documented in check_infinite_estimates.glm

# Copyright (C) 2016-2020 Ioannis Kosmidis

#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 or 3 of the License
#  (at your option).
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  http://www.r-project.org/Licenses/


#' A simple diagnostic of whether the maximum likelihood estimates are
#' infinite
#'
#' @aliases checkInfiniteEstimates
#'
#' @param object the result of a \code{\link{glm}} call.
#' @param nsteps starting from \code{maxit = 1}, the GLM is refitted
#'     for \code{maxit = 2}, \code{maxit = 3}, \ldots, \code{maxit =
#'     nsteps}. Default value is 30.
#' @param ... currently not used.
#'
#' @return
#'
#' An object of class \code{inf_check} that has a \code{plot} method.
#'
#' @details
#'
#' \code{check_infinite_estimates} attempts to identify the occurence
#' of infinite estimates in GLMs with binomial responses by
#' successively refitting the model. At each iteration the maximum
#' number of allowed IWLS iterations is fixed starting from 1 to
#' \code{nsteps} (by setting \code{control = glm.control(maxit = j)},
#' where \code{j} takes values 1, \ldots, nsteps in
#' \code{\link{glm}}). For each value of \code{maxit}, the estimated
#' asymptotic standard errors are divided to the corresponding ones
#' from \code{control = glm.control(maxit = 1)}. Then, based on the
#' results in Lesaffre & Albert (1989), if the sequence of ratios in
#' any column of the resultant matrix diverges, then complete or
#' quasi-complete separation occurs and the maximum likelihood
#' estimate for the corresponding parameter has value minus or plus
#' infinity.
#'
#' @return
#'
#' A matrix inheriting from class \code{inf_check}, with \code{nsteps}
#' rows and \code{p} columns, where \code{p} is the number of model
#' parameters. A \code{plot} method is provided for \code{inf_check}
#' objects for the easy inspection of the ratios of the standard
#' errors.
#' 
#' @note
#'
#' For the definition of complete and quasi-complete separation, see
#' Albert and Anderson (1984). Kosmidis and Firth (2019) prove that
#' the reduced-bias estimator that results by the penalization of the
#' logistic regression log-likelihood by Jeffreys prior takes always
#' finite values, even when some of the maximum likelihood estimates
#' are infinite. The reduced-bias estimates can be computed using the
#' \pkg{brglm2} R package.
#'
#' @seealso \code{\link[nnet]{multinom}},
#'     \code{\link{detect_separation}},
#'     \code{\link[brglm2]{brmultinom}}
#'
#' @references
#'
#' Lesaffre, E., & Albert, A. (1989). Partial Separation in Logistic
#' Discrimination. *Journal of the Royal Statistical Society. Series B
#' (Methodological)*, **51**, 109-116
#'
#' Kosmidis I. and Firth D. (2019). Jeffreys-prior penalty, finiteness
#' and shrinkage in binomial-response generalized linear
#' models. arXiv:1812.01938.
#' \url{https://arxiv.org/abs/1812.01938v3}
#'
#' @examples
#'
#' ## endometrial data from Heinze \& Schemper (2002) (see ?endometrial)
#' data("endometrial", package = "detectseparation")
#' endometrial_ml <- glm(HG ~ NV + PI + EH, data = endometrial,
#'                       family = binomial("probit"))
#' ## clearly the maximum likelihood estimate for the coefficient of
#' ## NV is infinite
#' (estimates <- check_infinite_estimates(endometrial_ml))
#' plot(estimates)
#' 
#'
#' \donttest{
#' ## Aligator data (Agresti, 2002, Table~7.1)
#' if (requireNamespace("brglm2", quietly = TRUE)) {
#'     data("alligators", package = "brglm2")
#'     all_ml <- brglm2::brmultinom(foodchoice ~ size + lake , weights = round(freq/3),
#'                          data = alligators, type = "ML", ref = 1)
#'     ## Clearly some estimated standard errors diverge as the number of
#'     ## Fisher scoring iterations increases
#'     plot(check_infinite_estimates(all_ml))
#'     ## Bias reduction the brglm2 R packages can be used to get finite estimates
#'     all_br <- brglm2::brmultinom(foodchoice ~ size + lake , weights = round(freq/3),
#'                          data = alligators, ref = 1)
#'     plot(check_infinite_estimates(all_br))
#' }
#' }
#' @export
check_infinite_estimates.glm <- function(object, nsteps = 20, ...) {
    valid_classes <- c("glm", "brglmFit", "brmultinom")
    is_brmultinom <- inherits(object, "brmultinom")       
    if (!inherits(object, valid_classes)) {
        warning("check_infinite_estimates has been designed for objects of class 'glm', 'brglmFit', 'brmultinom'")
    }
    if ((object$family$family != "binomial") & (!is_brmultinom)) {
        warning("check_infinite_estimates has been designed for binomial- or multinomial-response models")
    }
    if (is_brmultinom) {
        betas <- coef(object)
        dims <- dim(betas)
        betasNames <- paste(rep(colnames(betas), dims[1]), rownames(betas), sep = ":")
        betas <- c(betas)
        names(betas) <- betasNames
    }
    else {
        betas <- coef(object)
        betasNames <- names(betas)
    }
    eps <- .Machine$double.eps
    noNA <- !is.na(betas)
    stdErrors <- matrix(0, nsteps, length(betas))
    start <- NULL
    for (i in 1:nsteps) {
        if (is_brmultinom) {
            suppressWarnings(temp.object <- update(object, control = list(maxit = i, epsilon = eps, type = object$type), start = start))
            stdErrors[i, noNA] <- sqrt(diag(vcov(temp.object))[noNA])
        }
        else {
            suppressWarnings(temp.object <- update(object, control = list(maxit = i, epsilon = eps), start = start))
            stdErrors[i, noNA] <- summary(temp.object)$coef[betasNames[noNA], "Std. Error"]
        }
        start <- c(coef(temp.object))
    }
    res <- sweep(stdErrors, 2, stdErrors[1, ], "/")
    colnames(res) <- betasNames
    class(res) <- "inf_check"
    res
}

#' @export
plot.inf_check <- function(x, tol = 1e+2, ...) {
    ## heuristic for determining ploting ranges
    sds <- apply(x, 2, sd)    
    matplot(x, type = "l", lty = 1, ylim = range(x[, sds < tol]) * c(1, 1.5),
            ylab = "estimate", xlab = "number of iterations")
}

Try the detectseparation package in your browser

Any scripts or data that you put into this service are public.

detectseparation documentation built on March 25, 2020, 5:07 p.m.