#' Use optimization to estimate generalized linear models with arbitrary loss function
#'
#' @param formula an object of class \code{\link{"formula"}} (or one that can be coerced
#' to that class): a symbolic description of the model to be fitted.
#' @param data an optional data frame, list or environment (or object coercible by
#' \code{\link{as.data.frame}} to a data frame) containing the variables
#' in the model. If not found in data, the variables are taken from
#' \code{environment(formula)}, typically the environment from which
#' \code{optimLm} is called.
#' @param loss the loss function taking three arguments: target values \code{x},
#' predictions \code{y} and weights \code{w} (see \code{\link{loss}}
#' for details).
#' @param invlink inverse link function (see \code{\link[stats]{family}}).
#' @param subset an optional vector specifying a subset of observations to be used in
#' the fitting process.
#' @param weights an optional vector of 'prior weights' to be used in the fitting process.
#' Should be \code{NULL} or a numeric vector.
#' @param na.action a function which indicates what should happen when the data contain
#' \code{NA}s. The default is set by the \code{na.action} setting of
#' options, and is \code{\link[stats]{na.fail}} if that is unset.
#' The 'factory-fresh' default is \code{\link[stats]{na.omit}}.
#' Another possible value is \code{NULL}, no action. Value
#' \code{\link[stats]{na.exclude}} can be useful.
#' @param start starting values for the parameters in the linear predictor.
#' @param \dots additional parameters passed to \code{\link[stats]{optim}}.
#'
#' @details
#'
#' \code{optimLm} is a wrapper around \code{\link[stats]{optim}} function that can be
#' used to estimate the parameters of generalized linear model
#'
#' \deqn{
#' \hat\beta = \mathrm{arg\,min}_{\beta} \, \left\{\; L(Y, \, g^{-1}(X\beta)) \;\right\}
#' }{
#' \beta = argmin{ loss(Y, invlink(X\beta)) }
#' }
#'
#' where \eqn{g^{-1}}{invlink} is an inverse link function (see \code{\link[stats]{family}})
#' and \eqn{L}{loss} is a user-specified loss function.
#'
#' @examples
#'
#' myloss <- function(x, y, w) {
#' r <- x-y
#' out <- numeric(length(r))
#' out[r<0] <- r[r<0]^2
#' out[r>=0] <- abs(r[r>=0])
#' sum(w * out)
#' }
#'
#' optimLm(mpg ~ ., data = mtcars, loss = myloss)
#' optimLm(mpg ~ ., data = mtcars, loss = huberloss(epsilon = 4))
#' optimLm(mpg ~ ., data = mtcars, loss = thresholdloss(epsilon = 4))
#'
#' @export
optimLm <- function(formula, data, loss = loss("l2"), invlink = identity,
..., subset, weights, na.action, start = NULL) {
Call <- match.call()
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "na.action"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$method <- mf$control <- mf$model <- mf$x <- mf$y <- mf$contrasts <- NULL
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
Terms <- attr(mf, "terms")
y <- model.response(mf, "numeric")
X <- model.matrix(Terms, mf, contrasts)
ynames <- names(y)
xnames <- dimnames(X)[[2L]]
dx <- dim(X)
n <- dx[1L]
if (!missing(weights) && !is.numeric(weights))
stop("weights must be a numeric vector")
if (missing(weights))
weights <- rep(1, n)
if (any(weights < 0))
stop("weights need to be non-negative")
if (is.null(start))
start <- lsfit(X, y, intercept = FALSE)$coefficients
optfn <- function(beta) loss(y, invlink(drop(X %*% beta)), weights)
fit <- optim(start, fn = optfn, hessian = TRUE, ...)
yhat <- invlink(drop(X %*% fit$par))
structure(
list(call = Call,
dims = dx,
coefficients = fit$par,
fitted.values = yhat,
residuals = y - yhat,
weights = weights,
invlink = invlink,
loss = loss,
optim = list(start = start,
output = fit,
control = list(...)),
converged = fit$convergence == 0L),
class = "optimLm"
)
}
#' @export
print.optimLm <- function(x, digits = max(3L, getOption("digits") - 3L), ...) {
cat("Call:\n")
print(x$call)
cat("\nCoefficients:\n ")
print(format(round(x$coefficients, digits = digits)), quote = F, ...)
nobs <- x$dims[1]
rdf <- nobs - x$dims[2]
cat("\nDegrees of freedom:", nobs, "total;", rdf, "residual")
if (!x$converged)
warning("Algorithm did not converge.")
}
#' @export
fitted.optimLm <- function(object, ...) {
object$fitted.values
}
#' @export
predict.optimLm <- function(object, newdata, type = c("response", "link"),
na.action = na.pass, ...) {
type <- match.arg(type)
if (missing(newdata) || is.null(newdata))
return(fitted(object))
tt <- terms(object)
Terms <- delete.response(tt)
m <- model.frame(Terms, newdata, na.action = na.action, xlev = object$xlevels)
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, m)
X <- model.matrix(Terms, m, contrasts.arg = object$contrasts)
eta <- drop(X %*% coef(object))
if (type == "response")
return(object$invlink(eta))
else
return(eta)
}
#' @export
plot.optimLm <- function(x, ..., main = NULL,
xlab = "Fitted", ylab = "Residuals") {
plot(x$fitted, x$residuals, ..., main = main, xlab = xlab, ylab = ylab)
abline(h = 0, lty = 2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.