#' Diagnostics for Linear Regression
#'
#' Computes diagnostics for linear regression.
#'
#'
#' @param object the linear regression model object
#' @return A object of class "multReg" having the following components:\cr
#'
#' \item{aovtab}{ the analysis of variance table, using the type II sum of
#' squares. See \bold{Note}. } \item{parmests}{ a summary of \code{object}. }
#' \item{vif}{ a named vector of variance inflation factors. }
#' \item{diagstats}{ a data.frame containing the observed values, predicted
#' values, residuals, standardized residuals, studentized residuals, leverage,
#' Cook's D, and dfits for each observation. } \item{crit.val}{ a named vector
#' of the critical values for leverage, Cook's D, and dfits. See \bold{Note} }
#' \item{flagobs}{ a logical vector indicating which observations exceeded at
#' least one of the critical values. } \item{object}{ the \code{lm} object. }
#' \item{x}{ the model matrix of explanatory variables. }
#'
#' @note The type II sum of squares are calculated according to the principle
#' of marginality, testing each term after all others, except ignoring the
#' term's higher-order relatives. This type sum of squares is useful for
#' assessing the overall marginal effect of each term in the model.\cr
#'
#' The critical values for the test criteria are computed as: leverage,
#' \emph{3p/n}; Cook's D, median quantile for the \emph{F} distribution with
#' \emph{p+1} and \emph{n-p} degrees of freedom; and dfits, the .01 quantile
#' of the \emph{grubbs} distribution for \emph{n} observations time the square
#' root of (p/n), where \emph{p}
#' is the number of parameters estimated in the regression and \emph{n} is the
#' number of observations.
#'
#' Objects of class "multReg" have \code{print} and
#' \code{plot} methods.
#' @seealso \code{\link{lm}}, \code{\link{plot.multReg}},
#' @references Draper, N.R. and Smith, H., 1998, Applied Regression Analysis,
#' (3rd ed.): New York, Wiley, 724 p.\cr
#' @keywords models regression
#' @export multReg
multReg <- function(object) {
# Coding history:
# ????????? AVecchia Original coding for Env. Stats. class
# 2007Apr04 DLLorenz Modified for library
# 2007Jun05 DLLorenz Added span argument for plot function
# 2008Apr21 DLLorenz changed comment from parests to parmests
# 2008Apr24 DLLorenz Bug fix
# 2008Aug14 DLLorenz Added Wooding's test to plot
# 2008Oct20 DLLorenz Added PPCC test for normality
# 2010Nov12 DLLorenz Conversion to R
# 2011Apr26 DLLorenz Added polynomial nonlinearity test to partial plots
# 2011Jun29 DLLorenz Bug fix and name change
# 2011Oct25 DLLorenz Update for package
# 2012Mar22 DLLorenz Bug fix in diagnostic plots for the correlogram
# 2012Aug28 DLLorenz Change from diagPlot to plot
# 2012Dec27 DLLorenz Minor tweaks, including integer indexes
# 2013Jan08 DLLorenz Used Anova in car rather than anova for type II Sum Sq.
# 2013Apr09 DLLorenz Added setGD to plot
# 2013May10 DLLorenz Use na.omit to work with both na.omit and na.exclude in call
# 2013Dec30 DLLorenz Added * to print of observations exceeding test criterion
# 2014May14 DLLorenz Increased criterion for dfits
# 2014Dec22 DLLorenz Roxzygen headers
##
## Required USGS core functions: ppcc.test, vif
## Anova already imported from car by ancovareg
aovtab <- Anova(object) # Anova from car does type II
regsum <- summary(object)
vif <- vif(object)
## Compute the diagnostics using lsfit
## get x matrix
if(is.null(object[['x']])) # to avoid conflict with the xlevels component.
x <- model.matrix(object)[, -1L, drop=FALSE]
else
x <- object$x[, -1L, drop=FALSE]
## get y
if(is.null(object$y))
y <- model.extract(model.frame(object), "response")
else
y <- object$y
respvar <- make.names(object$terms[[2L]])
if(length(respvar) > 1L) # happens when function is used
respvar <- paste(respvar, collapse='.')
## get weights
if(is.null(object$weights))
wt <- rep(1, length(y))
else
wt <- object$weights
## Run lsfit and ls.diag to get the residual diagnostic stats we need:
lsout <- lsfit(x, y, wt)
lsdiag <- ls.diag(lsout)
lev <- lsdiag$hat
cooksd <- lsdiag$cooks
std.res <- lsdiag$std.res
dfits <- lsdiag$dfits
stud.res <- lsdiag$stud.res
## Compute critical values for diagnostic statistics and pick out offending
## observations
p <- ncol(x)+1
n <- nrow(x)
cvlev <- 3*p/n
cvdfit <- qgrubbs(0.01, n)*sqrt(p/n)
cvcook <- qf(.5,p+1,n-p)
pck <- c(lev>cvlev | cooksd>cvcook | abs(dfits)>cvdfit)
## Combine the diagnostic stats into a single data set, round it to 3
## decimals, rename y, and combine the output into a list.
yhat <- object$fitted.values
diagstats <- data.frame(y=y, yhat=yhat, resids=lsout$residuals,
stnd.res=std.res, stud.res=stud.res,
leverage=lev, cooksD=cooksd, dfits=dfits)
names(diagstats)[1L] <- respvar
cvs <- c(cvlev,cvcook,cvdfit)
names(cvs) <- c('leverage','cooksD','dfits')
stuff.out <- list(aovtab=aovtab, parmests=regsum, vif=vif,
diagstats=diagstats, crit.val=cvs, flagobs=pck,
object=object, x=x)
oldClass(stuff.out) <- "multReg"
return(stuff.out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.