Nothing
#' Binomial regression
#'
#' A unified interface for binomial regression models, including
#' linear probability, probit and logit models
#'
#' @name binomreg
#' @param formula a symbolic description of the model
#' @param data a data frame,
#' @param subset,weights,na.action,offset,contrasts see `stats::lm`,
#' @param link one of `"identity"`, `"probit"` and "`logit`" to fit
#' respectively the linear probability, the probit and the logit
#' model
#' @param method `"ml"` for maximum likelihood (the only relevant
#' method for a regression without instrumental variables),
#' `"twosteps"` for two-steps estimator, `"minchisq"` for minimum
#' chi-squared estimator and `"test"` to get the exogeneity test,
#' @param start a vector of starting values
#' @param robust only when `method = "twosteps"`, should the robust
#' covariance matrix be computed?
#' @param opt optimization method
#' @param maxit maximum number of iterations
#' @param trace printing of intermediate result
#' @param object,x,type a `binomreg` object and the type of residuals
#' for the `residuals` method
#' @param check_gradient if `TRUE` the numeric gradient and hessian
#' are computed and compared to the analytical gradient and
#' hessian
#' @param ... further arguments
#' @param newdata a new data frame for the `predict` method
#' @return an object of class `c("binomreg", "micsr")`, see
#' `micsr::micsr` for further details
#' @importFrom stats glm plogis qlogis dlogis
#' @importFrom Formula Formula model.part
#' @keywords models
#' @examples
#' pbt <- binomreg(mode ~ cost + ivtime + ovtime, data = mode_choice, link = 'probit')
#' lpm <- binomreg(mode ~ cost + ivtime + ovtime, data = mode_choice, link = 'identity')
#' summary(pbt, vcov = "opg")
#' @export
binomreg <- function(formula, data, weights, subset, na.action, offset, contrasts = NULL,
link = c("identity", "probit", "logit"),
method = c("ml", "twosteps", "minchisq", "test"),
start = NULL,
robust = TRUE,
opt = c("newton", "nr", "bfgs"),
maxit = 100, trace = 0,
check_gradient = FALSE, ...){
.opt <- match.arg(opt)
.method <- match.arg(method)
.call <- match.call()
.link <- match.arg(link)
.linkfun <- switch(.link,
probit = function(mu) qnorm(mu),
logit = function(mu) qlogis(mu))
.linkinv <- switch(.link,
probit = function(mu) pnorm(mu),
logit = function(mu) plogis(mu))
.mu.eta <- switch(.link,
probit = function(mu) dnorm(mu),
logit = function(mu) dlogis(mu))
mf <- match.call(expand.dots = FALSE)
.formula <- Formula(formula)
if (length(.formula)[2] == 2){
mf$model <- "probit"
mf$method <- .method
mf[[1L]] <- as.name("ivldv")#quote(micsr::ivldv())
result <- eval(mf, parent.frame())
result$call <- .call
return(result)
} else {
if (.method != "ml")
stop("with a one-part formula, the only relevant method is ml")
}
m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"),
names(mf), 0L)
# construct the model frame and components
mf <- mf[c(1L, m)]
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
y <- model.response(mf)
wt <- as.vector(model.weights(mf))
if (is.null(wt)) wt <- 1 else wt <- wt# / mean(wt)
.offset <- model.offset(mf)
X <- model.matrix(mt, mf, contrasts)
if (compute_rank(X) < ncol(X)){
.rank <- compute_rank(X)
.ncol <- ncol(X)
stop(paste("the rank of X = ", .rank, " < the number of columns = ", .ncol, sep = ""))
}
yb <- mean(y)
q <- 2 * y - 1
K <- ncol(X)
N <- length(y)
.df.residual <- N - K
.null_logLik <- N * (yb * log(yb) + (1 - yb) * log(1 - yb))
.sat_logLik <- 0
.null_deviance <- - 2 * .null_logLik
if (.link == "identity"){
.null_intercept <- yb
lnl <- function(coefs, gradient = FALSE, hessian = FALSE, information = FALSE,
opposite = FALSE, sum = TRUE, X, y, weights){
sgn <- ifelse(opposite, - 1, + 1)
K <- ncol(X)
N <- length(y)
beta <- coefs[1:K]
sig <- coefs[K + 1]
linpred <- drop(X %*% beta)
lnl <- dnorm(y, mean = linpred, sd = sig, log = TRUE)
if (gradient){
grad <- cbind((y - linpred) / sig ^ 2 * X,
- 1 / sig + (y - linpred) ^ 2 / sig ^ 3)
}
if (hessian){
hess_bb <- - crossprod(sqrt(weights) * X) / sig ^ 2
hess_ss <- - 2 / sig ^ 2
hess_ss <- 1 / sig ^ 2 * sum(weights) - 3 * sum(weights * (y - linpred) ^ 2) / sig ^ 4
hess <- rbind(cbind(hess_bb, sigma = rep(0, K)),
sigma = c(rep(0, K), hess_ss))
}
if (information){
info_bb <- crossprod(sqrt(weights) * X) / sig ^ 2
info_ss <- 2 / sig ^ 2
info <- rbind(cbind(info_bb, rep(0, K)),
c(rep(0, K), info_ss))
}
if (sum){
lnl <- sum(weights * lnl)
if (gradient) grad <- apply(weights * grad, 2, sum)
}
if (gradient) attr(lnl, "gradient") <- grad * sgn
if (hessian) attr(lnl, "hessian") <- hess * sgn
if (information) attr(lnl, "info") <- info
lnl * sgn
}
}
if (.link == "probit"){
.null_intercept <- qnorm(yb)
lnl <- function(coefs, gradient = FALSE, hessian = FALSE, information = FALSE,
opposite = FALSE, sum = TRUE, X, y, weights){
sgn <- ifelse(opposite, +1, -1)
linpred <- drop(X %*% coefs)
q <- 2 * y - 1
lnl <- pnorm(q * linpred, log.p = TRUE)
if (gradient) grad <- q * mills(q * linpred) * X
if (hessian) hess <- - crossprod(sqrt(- weights * mills(q * linpred, 1)) * X)
if (information) info <- crossprod(sqrt(weights * mills(linpred) * mills(- linpred)) * X)
if (sum){
lnl <- sum(lnl * weights)
if (gradient) grad <- apply(grad * weights, 2, sum)
}
if (gradient) attr(lnl, "gradient") <- grad * sgn
if (hessian) attr(lnl, "hessian") <- hess * sgn
if (information) attr(lnl, "info") <- info
lnl * sgn
}
lnl <- function(coefs, gradient = FALSE, hessian = FALSE, information = FALSE,
opposite = FALSE, sum = TRUE, X, y, weights){
sgn <- ifelse(opposite, - 1, + 1)
linpred <- drop(X %*% coefs)
q <- 2 * y - 1
lnl <- pnorm(q * linpred, log.p = TRUE)
if (gradient) grad <- q * mills(q * linpred) * X
if (hessian) hess <- - crossprod(sqrt(- weights * mills(q * linpred, 1)) * X)
if (information) info <- crossprod(sqrt(weights * mills(linpred) * mills(- linpred)) * X)
if (sum){
lnl <- sum(lnl * weights)
if (gradient) grad <- apply(grad * weights, 2, sum)
}
if (gradient) attr(lnl, "gradient") <- grad * sgn
if (hessian) attr(lnl, "hessian") <- hess * sgn
if (information) attr(lnl, "info") <- info
lnl * sgn
}
}
if (.link == "logit"){
.null_intercept <- qlogis(yb)
lnl <- function(coefs, gradient = FALSE, hessian = FALSE, information = FALSE,
opposite = FALSE, sum = TRUE, X, y, weights){
sgn <- ifelse(opposite, - 1, + 1)
linpred <- drop(X %*% coefs)
q <- 2 * y - 1
lnl <- plogis(q * linpred, log.p = TRUE)
if (gradient) grad <- (y - exp(linpred) / (1 + exp(linpred))) * X
if (hessian) hess <- - crossprod(sqrt(weights * exp(linpred) / (1 + exp(linpred)) ^ 2) * X)
if (information) info <- crossprod(sqrt(weights * exp(linpred) / (1 + exp(linpred)) ^ 2) * X)
if (sum){
lnl <- sum(lnl * weights)
if (gradient) grad <- apply(grad * weights, 2, sum)
}
if (gradient) attr(lnl, "gradient") <- grad * sgn
if (hessian) attr(lnl, "hessian") <- hess * sgn
if (information) attr(lnl, "info") <- info
lnl * sgn
}
}
## if (is.null(start)){
## start <- rep(0, K)
## names(start) <- colnames(X)
## if (.link != "identity"){
## .coefs <- newton(lnl, X = X, y = y, weights = wt,
## trace = 0, coefs = start, direction = "max")
## }
## else{
## .coefs <- drop(solve(crossprod(sqrt(wt) * X), crossprod(wt * X, y)))
## .sigma <- sqrt(mean(wt * (y - drop(X %*% .coefs) ^ 2)))
## .coefs <- c(.coefs, sigma = .sigma)
## }
## }
## else .coefs <- start
if (is.null(start)){
start <- rep(0, K)
}
names(start) <- colnames(X)
if (maxit > 0){
if (.link != "identity"){
## .coefs <- newton(lnl, X = X, y = y, weights = wt,
## trace = 1, coefs = start, direction = "max")
## stop()
.coefs <- maximize(lnl, start = start, method = .opt, trace = trace, maxit = maxit,
X = X, y = y, weights = wt, ...)
}
else{
.coefs <- drop(solve(crossprod(sqrt(wt) * X), crossprod(wt * X, y)))
.sigma <- sqrt(mean(wt * (y - drop(X %*% .coefs) ^ 2)))
.coefs <- c(.coefs, sigma = .sigma)
}
}
else .coefs <- start
if (.link != "identity") .linpred <- drop(X %*% .coefs)
else .linpred <- drop(X %*% .coefs[- (ncol(X) + 1)])
.lnl_conv <- lnl(.coefs, X = X, y = y, weights = wt, gradient = TRUE,
hessian = TRUE, info = TRUE, sum = FALSE)
fun <- function(x) lnl(x, X = X, y = y, weights = wt, gradient = TRUE,
hessian = TRUE, info = TRUE, sum = TRUE)
if (check_gradient) z <- check_gradient(fun, .coefs) else z <- NA
l_model <- as.numeric(.lnl_conv)
l_saturated <- 0
l_null <- yb * log(yb) + (1 - yb) * log(1 - yb)
values <- cbind(model = l_model, saturated = l_saturated, null = l_null)
.logLik <- apply(values * wt, 2, sum)
if (.link == "logit") .fitted <- plogis(.linpred)
if (.link == "probit") .fitted <- pnorm(.linpred)
if (.link == "identity") .fitted <- .linpred
.npar <- c(covariates = K)
if (.link == "identity") .npar <- c(covariates = K, vcov = 1)
attr(.npar, "default") <- "covariates"
# Null model
null_coefs <- rep(0, ncol(X))
names(null_coefs) <- colnames(X)
null_coefs["(Intercept)"] <- .null_intercept
if (.link == "identity") null_coefs <- c(null_coefs, sigma = sqrt(mean((y - mean(y)) ^ 2)))
lnl_null <- lnl(null_coefs, gradient = TRUE, hessian = TRUE, information = TRUE,
X = X, y = y, weights = wt)
.null_gradient <- attr(lnl_null, "gradient")
.null_info <- attr(lnl_null, "info")
.lm <- drop(crossprod(.null_gradient, solve(.null_info, .null_gradient)))
.model_info <- attr(.lnl_conv, "info")
.vcov <- solve(.model_info)
.wald <- drop(crossprod(.coefs[- 1],
t(crossprod(.coefs[-1],
solve(.vcov[- 1, - 1, drop = FALSE])))))
.lr <- 2 * unname(.logLik["model"] - .logLik["null"])
tests <- c(wald = .wald, score = .lm, lr = .lr)
gres <- (y - pnorm(.linpred)) * dnorm(.linpred) / (pnorm(.linpred) * (1 - pnorm(.linpred)))
.family <- structure(list(
link = .link,
linkfun = .linkfun,
linkinv = .linkinv,
mu.eta = .mu.eta,
variance = function(mu) mu * (1 - mu),
family = "binomial"),
class = "family")
result <- list(coefficients = .coefs,
model = mf,
terms = mt,
value = values,
gradient = attr(.lnl_conv, "gradient"),
hessian = attr(.lnl_conv, "hessian"),
info = attr(.lnl_conv, "info"),
fitted.values = .fitted,
residuals = gres,
linear.predictors = .linpred,
logLik = .logLik,
tests = tests,
df.residual = .df.residual,
npar = .npar,
est_method = "ml",
call = .call,
na.action = attr(mf, "na.action"),
weights = wt,
offset = .offset,
contrasts = attr(X, "contrasts"),
xlevels = .getXlevels(mt, mf),
check_gradient = z,
family = .family)
structure(result, class = c("binomreg", "micsr"))
}
#' @rdname binomreg
#' @method glance binomreg
#' @export
glance.binomreg <- function(x, ...){
.est_method <- x$est_method
N <- nobs(x)
result <- data.frame(nobs = nobs(x))
if (.est_method == "ml"){
result <- data.frame(null.deviance = deviance(x, type = "null"),
df.null = nobs(x) - 1,
logLik = logLik(x),
AIC = AIC(x),
BIC = BIC(x),
deviance = deviance(x, type = "model"),
df.residual = df.residual(x),
nobs = nobs(x))
}
# if (.est_method == "twosteps"){
# result <- data.frame(nobs = nobs(x), impliedsigma = x$sigma)
# }
result
}
#' @rdname binomreg
#' @export
predict.binomreg <- function(object, ..., type = c("response", "link"), newdata = NULL){
.type <- match.arg(type)
.link <- object$call$link
if (is.null(newdata)){
if (.type == "response") result <- object$fitted
if (.type == "link") result <- object$linear.predictors
}
else{
.formula <- Formula(object$formula)
mf <- model.frame(.formula, newdata, dot = "previous")
X <- model.matrix(.formula, mf, rhs = 1)
y <- model.part(.formula, newdata, lhs = 1, drop = TRUE)
result <- drop(X %*% coef(object))
cum_fun <- switch(.link,
"logit" = plogis,
"probit" = pnorm,
"lm" = function(x) x)
if (.type == "response") result <- cum_fun(result)
}
result
}
# ajout dans le fichier type_dictionary.R
# ajout du fichier methods_binomreg
# sanity_model.R ajouter dans la liste des modèles supportés
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.