Nothing
#' FLAC - Firth's logistic regression with added covariate
#'
#' \code{flac} implements Firth's bias-reduced penalized-likelihood logistic regression with added covariate.
#'
#' FLAC is a simple modification of Firth's logistic regression which provides average predicted
#' probabilities equal to the observed proportion of events, while preserving the ability to deal
#' with separation. It has been described by Puhr et al (2017).
#'
#' The modified score equations to estimate coefficients for Firth's logistic regression can be
#' interpreted as score equations for ML estimates for an augmented data set. This data set can be
#' created by complementing each original observation i with two pseudo-observations weighted by
#' \eqn{h_i/2} with unchanged covariate values and with response values set to \eqn{y=0} and \eqn{y=1}
#' respectively. The basic idea of FLAC is to discriminate between original and pseudo-observations
#' in the alternative formulation of Firth's estimation as an iterative data augmentation procedure.
#' The following generic methods are available for ' \code{flac}'s output object: \code{print, summary, coef, confint, anova, extractAIC, add1, drop1,
#' profile, terms, nobs, predict}. Furthermore, forward and backward functions perform convenient variable selection. Note
#' that anova, extractAIC, add1, drop1, forward and backward are based on penalized likelihood
#' ratio tests.
#' @encoding UTF-8
#'
#' @param formula A formula object, with the response on the left of the operator,
#' and the model terms on the right. The response must be a vector with 0 and 1 or \code{FALSE} and
#' \code{TRUE} for the outcome, where the higher value (1 or \code{TRUE}) is modeled.
#' @param data A data frame containing the variables in the model.
#' @param lfobject A fitted \code{\link{logistf}} object.
#' @param model If TRUE the corresponding components of the fit are returned.
#' @param control Controls iteration parameter. Taken from \code{logistf}-object when specified. Otherwise default is \code{control= logistf.control()}.
#' @param modcontrol Controls additional parameter for fitting. Taken from \code{logistf}-object when specified. Otherwise default is \code{logistf.mod.control()}.
#' @param weights specifies case weights. Each line of the input data set is multiplied
#' by the corresponding element of weights
#' @param na.action a function which indicates what should happen when the data contain NAs
#' @param offset a priori known component to be included in the linear predictor
#' @param pl Specifies if confidence intervals and tests should be based on the profile
#' penalized log likelihood (\code{pl=TRUE}, the default) or on the Wald method (\code{pl=FALSE}).
#' @param plconf specifies the variables (as vector of their indices) for which profile likelihood
#' confidence intervals should be computed. Default is to compute for all variables.
#' @param ... Further arguments passed to the method or \code{\link{logistf}}-call.
#'
#' @return A \code{flac} object with components:
#' \item{coefficients}{The coefficients of the parameter in the fitted model.}
#' \item{predict}{A vector with the predicted probability of each observation}
#' \item{linear.predictors}{A vector with the linear predictor of each observation.}
#' \item{prob}{The p-values of the specific parameters}
#' \item{ci.lower}{The lower confidence limits of the parameter.}
#' \item{ci.upper}{The upper confidence limits of the parameter.}
#' \item{call}{The call object.}
#' \item{alpha}{The significance level: 0.95}
#' \item{var}{The variance-covariance-matrix of the parameters.}
#' \item{loglik}{A vector of the (penalized) log-likelihood of the restricted and the full models.}
#' \item{n}{The number of observations.}
#' \item{formula}{The formula object.}
#' \item{augmented.data}{The augmented dataset used}
#' \item{df}{The number of degrees of freedom in the model.}
#' \item{method}{depending on the fitting method 'Penalized ML' or `Standard ML'.}
#' \item{method.ci}{the method in calculating the confidence intervals, i.e. `profile likelihood' or `Wald', depending on the argument pl and plconf.}
#' \item{control}{a copy of the control parameters.}
#' \item{modcontrol}{a copy of the modcontrol parameters.}
#' \item{terms}{the model terms (column names of design matrix).}
#' \item{model}{if requested (the default), the model frame used.}
#'
#' @export
#'
#' @examples
#' #With formula and data:
#' data(sex2)
#' flac(case ~ age + oc + vic + vicl + vis + dia, sex2)
#'
#' #With a logistf object:
#' lf <- logistf(formula = case ~ age + oc + vic + vicl + vis + dia, data = sex2)
#' flac(lf, data=sex2)
#'
#' @references Puhr R, Heinze G, Nold M, Lusa L, Geroldinger A (2017). Firth's logistic regression with rare events:
#' accurate effect estimates and predictions? Statistics in Medicine 36: 2302-2317.
#'
#' @seealso [logistf()] for Firth's bias-Reduced penalized-likelihood logistic regression.
#' @importFrom stats formula
#' @rdname flac
#' @export flac
flac <- function(...){
UseMethod("flac")
}
#' @method flac default
#' @exportS3Method flac default
#' @describeIn flac With formula and data
flac.default <- function(formula, data, model=TRUE, control, modcontrol, weights, offset, na.action, pl=TRUE, plconf=NULL,...){
extras <- list(...)
if(missing(control)){
control <- logistf.control()
}
if(missing(modcontrol)){
modcontrol <- logistf.mod.control()
}
mf <- match.call(expand.dots =FALSE)
m <- match(c("formula","data","weights","na.action","offset"), names(mf), 0L)
mf <- mf[c(1, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- quote(stats::model.frame)
f <- substitute(logistf(formula, data=data, pl=F, control = control, modcontrol = modcontrol,
weights = weights, offset = offset, na.action = na.action,...))
temp.fit1 <- eval(f)
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
y <- model.response(mf)
n <- length(y)
x <- model.matrix(mt, mf)
weights <- model.weights(mf)
offset <- as.vector(model.offset(mf))
if (is.null(offset)) offset<-rep(0,n)
if (is.null(weights)) weights<-rep(1,n)
response <- formula.tools::lhs.vars(formula)
#apply firths logistic regression and calculate diagonal elements h_i of hat matrix
#and construct augmented dataset and definition of indicator variable g
temp.pseudo <- c(rep(0,length(y)), rep(1,2*length(y)))
temp.neww <- c(weights*rep(1,length(y)), temp.fit1$hat*temp.fit1$modcontrol$tau, temp.fit1$hat*temp.fit1$modcontrol$tau)
newdat <- data.frame(rbind(x[,-1], x[,-1], x[,-1]), newresp = c(y,y,1-y), temp.pseudo=temp.pseudo, temp.neww=temp.neww)
#ML estimation on augmented dataset
temp.fit2 <- logistf(newresp ~.-temp.neww,data=newdat, weights=temp.neww, firth=FALSE, control = control, modcontrol = modcontrol, pl=pl, ...)
temp.fit3 <- logistf(newresp ~ temp.pseudo,data=newdat, weights=temp.neww, firth=FALSE, pl = FALSE, ...)
#outputs
coefficients <- temp.fit2$coefficients[which("temp.pseudo"!=names(temp.fit2$coefficients) & "`(weights)`"!=names(temp.fit2$coefficients))]
names(coefficients) <- names(coef(temp.fit1))
fitted <- temp.fit2$predict[1:length(temp.fit1$y)]
linear.predictors <- temp.fit2$linear.predictors[1:length(y)]
prob <- temp.fit2$prob[which("temp.pseudo"!=names(temp.fit2$prob))]
ci.lower <- temp.fit2$ci.lower[which("temp.pseudo"!=names(temp.fit2$ci.lower))]
ci.upper <- temp.fit2$ci.upper[which("temp.pseudo"!=names(temp.fit2$ci.upper))]
var <- temp.fit2$var
rownames(var) <- colnames(var) <- names(temp.fit2$coefficients)
var <- var[which("temp.pseudo"!=names(temp.fit2$coefficients)), which("temp.pseudo"!=names(temp.fit2$coefficients))]
method.ci <- temp.fit2$method.ci[which("temp.pseudo"!=names(temp.fit2$coefficients))]
res <- list(coefficients=coefficients,
alpha = temp.fit1$alpha,
terms = colnames(x),
var=var,
df = (temp.fit1$df),
loglik = c('full' = unname(temp.fit2$loglik['full']),
'null' = unname(temp.fit3$loglik['full'])),
n=temp.fit1$n,
formula=formula(formula),
call=match.call(),
linear.predictors=linear.predictors,
predict = fitted,
prob=prob,
method = temp.fit2$method,
method.ci = method.ci,
ci.lower=ci.lower,
ci.upper=ci.upper,
control = control,
modcontrol = modcontrol,
augmented.data = newdat)
if(model) res$model <- mf
attr(res, "class") <- c("flac")
res
}
#' @method flac logistf
#' @exportS3Method flac logistf
#' @describeIn flac With logistf object
flac.logistf <- function(lfobject, data, model=TRUE, ... ){
extras <- list(...)
Call <- match.call()
mf <- match.call(expand.dots =FALSE)
m <- match(c("lfobject", "data", "model"), names(mf), 0L)
mf <- mf[c(1, m)]
lfobject <- eval(mf$lfobject, parent.frame())
weights <- model.weights(lfobject$model)
offset <- model.offset(lfobject$model)
if (is.null(offset)) offset<-rep(0,nrow(data))
if (is.null(weights)) weights<-rep(1,nrow(data))
#to update flac.logistf objects at least with formula
if(!is.null(extras$formula)) lfobject <- update(lfobject, formula. = extras$formula)
modcontrol <- lfobject$modcontrol
control <- lfobject$control
if(lfobject$flic) stop("Please call flac() only on logistf-objects with flic=FALSE")
formula <- lfobject$formula
f <- substitute(flac.default(formula, data=data, model = model, control = control, modcontrol = modcontrol,
weights = weights, offset = offset, ...))
fit <- eval(f)
fit$call <- Call
return(fit)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.