# R/detect_infinite_estimates.R In detectseparation: Detect and Check for Separation and Infinite Maximum Likelihood Estimates

#### Documented in detect_infinite_estimates

```# Copyright (C) 2022- Florian Schwendinger, Ioannis Kosmidis

#  This program is free software; you can redistribute it and/or modify
#  the Free Software Foundation; either version 2 or 3 of the License
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at

#' @title Detect Infinite Estimates
#'
#' @description
#'
#' Method for \code{\link{glm}} that detects infinite components in
#' the maximum likelihood estimates of generalized linear models
#' with binomial responses.
#'
#' @inheritParams stats::glm.fit
#'
#' @aliases detectInfiniteEstimates
#'
#' @param x \code{x} is a design matrix of dimension \code{n * p}.
#' @param y \code{y} is a vector of observations of length \code{n}.
#' @param control a list of parameters controlling separation
#'     details.
#' @param start currently not used.
#' @param mustart currently not used.
#' @param etastart currently not used.
#' @param singular.ok logical. If \code{FALSE}, a singular model is an
#'     error.
#'
#'
#' @details
#'
#' For binomial-response generalized linear models with \code{"log"}
#' infinite maximum likelihood estimates. For this reason, for models
#' \code{\link{detect_infinite_estimates}()} relies on an alternative
#' linear optimization model developed in Schwendinger et al. (2021),
#' and for all the other supported links it relies on the linear
#' programming methods developed in Konis (2007). See
#' \code{\link{detect_separation}()} for definitions and details.
#'
#' \code{\link{detect_infinite_estimates}()} is a wrapper to the
#' functions \code{separator_ROI()}, \code{separator_lpSolveAPI()} (a
#' modified version of the \code{separator()} function from the
#' **safeBinaryRegression** R package), and \code{dielb_ROI()}.
#'
#' The \code{\link{coefficients}()} method extracts a vector of values
#' for each of the model parameters under the following convention:
#' \code{0} if the maximum likelihood estimate of the parameter is
#' finite, and \code{Inf} or \code{-Inf} if the maximum likelihood
#' estimate of the parameter if plus or minus infinity. This
#' convention makes it easy to adjust the maximum likelihood estimates
#' to their actual values by element-wise addition.
#'
#' \code{\link{detect_infinite_estimates}()} can be passed directly as
#' a method to the \code{\link{glm}} function. See, examples.
#'
#' \code{detectInfiniteEstimates}() is an alias for
#' \code{detect_infinite_estimates}().
#'
#' @author Ioannis Kosmidis [aut, cre] \email{ioannis.kosmidis@warwick.ac.uk}, Florian Schwendinger [aut] \email{FlorianSchwendinger@gmx.at}, Dirk Schumacher [aut] \email{mail@dirk-schumacher.net}, Kjell Konis [ctb] \email{kjell.konis@me.com}
#'
#'
#' @references
#'
#' Silvapulle, M. J. (1981).
#' On the Existence of Maximum Likelihood Estimators for the Binomial Response Models.
#' Journal of the Royal Statistical Society. Series B (Methodological), 43(3), 310–313.
#' \url{https://www.jstor.org/stable/2984941}
#'
#' Konis K. (2007). *Linear Programming Algorithms for Detecting
#' Separated Data in Binary Logistic Regression
#' Models*. DPhil. University of Oxford.
#' \url{https://ora.ox.ac.uk/objects/uuid:8f9ee0d0-d78e-4101-9ab4-f9cbceed2a2a}
#'
#' Konis K. (2013). safeBinaryRegression: Safe Binary Regression. R
#' package version 0.1-3.
#' \url{https://CRAN.R-project.org/package=safeBinaryRegression}
#'
#' Kosmidis I. and Firth D. (2021). Jeffreys-prior penalty, finiteness
#' and shrinkage in binomial-response generalized linear
#' models. *Biometrika*, **108**, 71–82. \doi{10.1093/biomet/asaa052}
#'
#' Schwendinger, F., Grün, B. & Hornik, K. (2021). A comparison of
#' optimization solvers for log binomial regression including conic
#' programming.  *Computational Statistics*, **36**,
#' 1721–1754. \doi{10.1007/s00180-021-01084-5}
#'
#'
#' @examples
#' # The classical example given in Silvapulle (1981) can be utilized
#' # to show that for the Log-Binomial model there exist data allocations
#' # which are separated but produce finite estimates.
#' data("silvapulle1981", package = "detectseparation")
#'
#' # Since the data is separated the MLE does not exist for the logit link.
#' glm(y ~ ghqs, data = silvapulle1981, family = binomial(),
#'     method = "detect_infinite_estimates")
#'
#' # However, for the log link all components of the MLE are finite.
#' glm(y ~ ghqs, data = silvapulle1981, family = binomial("log"),
#'     method = "detect_infinite_estimates")
#' glm(y ~ ghqs, data = silvapulle1981, family = binomial("log"), start = c(-1, 0))
#'
#' @export
detect_infinite_estimates <- function(x, y, weights = NULL,
start = NULL, etastart = NULL,  mustart = NULL,
offset = NULL, family = gaussian(),
control = list(), intercept = TRUE, singular.ok = TRUE) {
if (isTRUE(family\$family != "binomial")) {
warning("`detect_infinite_estimates` has been developed for use with binomial-response GLMs")
}
nobs <- NROW(y)
if (is.null(weights)) {
weights <- rep.int(1, nobs)
}
if (is.null(offset)) {
offset <- rep.int(0, nobs)
}
out <- .detect_infinite_estimates(x = x, y = y, weights = weights, start = start,
etastart = etastart,  mustart = mustart,
offset = offset, family = family, control = control,
intercept = control, singular.ok = singular.ok,
class(out) <- out\$class <- "detect_infinite_estimates"
out
}

#' @method print detect_infinite_estimates
#' @export
print.detect_infinite_estimates <- function(x, digits = max(5L, getOption("digits") - 3L), ...) {
cat("Implementation:", x\$control\$implementation, "| ")
if (identical(x\$control\$implementation, "ROI")) {
cat("Solver:", x\$control\$solver, "\n")
}
else {
cat("Linear program:", x\$control\$linear_program, "| Purpose:", x\$control\$purpose, "\n")
}
cat("Infinite estimates:", x\$outcome, "\n")
if (!is.null(x\$coefficients)) {
cat("Existence of maximum likelihood estimates\n")
print(coefficients(x))
cat("0: finite value, Inf: infinity, -Inf: -infinity\n")
}
}

## Workhorse function
.detect_infinite_estimates <- function(x, y, weights = NULL,
start = NULL, etastart = NULL,  mustart = NULL,
offset = NULL, family = gaussian(),
control = list(), intercept = TRUE, singular.ok = TRUE,
control <- do.call("detect_separation_control", control)
lp <- if (log_link) dielb_ROI else control\$separator
# ensure x is a matrix
x <- as.matrix(x)
betas_names_all <- betas_names <- if (is.null(colnames(x))) make.names(seq_len(NCOL(x))) else colnames(x)
#
nobs <- NROW(y)
nvars <- ncol(x)
if (nvars == 0) {
return(list(outcome = FALSE, control = control))
}
if (is.null(weights)) {
weights <- rep.int(1, nobs)
}
if (missingOffset <- is.null(offset)) {
offset <- rep.int(0, nobs)
}
# Initialize as prescribed in family
eval(family\$initialize)
if (control\$solver == "alabama" & is.null(control\$solver_control\$start)) {
control\$solver_control\$start <- rep(0, nvars)
}
# Detect aliasing
qrx <- qr(x)
rank <- qrx\$rank
is_full_rank <- rank == nvars
if (!singular.ok && !is_full_rank) {
stop("singular fit encountered")
}
if (!isTRUE(is_full_rank)) {
aliased <- qrx\$pivot[seq.int(qrx\$rank + 1, nvars)]
X_all <- x
x <- x[, -aliased]
betas_names <- betas_names[-aliased]
}
betas_all <- structure(rep(NA_real_, length(betas_names_all)), .Names = betas_names_all)
# Observations with zero weight do not enter calculations so ignore
keep <- weights > 0
x <- x[keep, , drop = FALSE]
y <- y[keep]
# Reshape data set: keep 0 and 1, and replace anything in (0,
# 1) with one zero and one 1
ones <- y == 1
zeros <- y == 0
non_boundary <- !(ones | zeros)
x <- x[c(which(ones), which(zeros), rep(which(non_boundary), 2)), , drop = FALSE]
y <- c(y[ones], y[zeros], rep(c(0., 1.), each = sum(non_boundary)))
# Run linear program
out <- lp(x = x, y = y,
linear_program = control\$linear_program,
purpose = control\$purpose,
tolerance = control\$tolerance,
solver = control\$solver,
solver_control = control\$solver_control)
if (is.na(out\$outcome)) {
if (identical(control\$implementation, "ROI")) {
warning("unexpected result from implementation ", control\$implementation, " with solver: ", control\$solver, "\n")
}
if (identical(control\$implementation, "lsSolveAPI") & !log_link) {
warning("unexpected result from implementation ", control\$implementation, " with linear_program: ", control\$linear_program, " and purpose: ", control\$purpose, "\n")
}
}
if (is.null(out\$beta)) {
betas_all <- NULL
}
else {
betas <- out\$beta
names(betas) <- betas_names
inds <- abs(betas) < control\$tolerance
betas <- Inf * betas
betas[inds] <- 0
betas_all[betas_names] <- betas[betas_names]
}
out <- list(x = x,
y = y,
coefficients = betas_all,
outcome = out\$outcome,
control = control)
return(out)
}
```

## Try the detectseparation package in your browser

Any scripts or data that you put into this service are public.

detectseparation documentation built on Aug. 27, 2022, 1:08 a.m.