Nothing
# linmod.R: Example S3 linear model.
#
# See www.milbo.org/doc/modguide.pdf.
# This software may be freely used.
linmod <- function(...) UseMethod("linmod")
linmod.default <- function(x = stop("no 'x' argument"),
y = stop("no 'y' argument"),
keep = FALSE,
...)
{
stop.if.dot.arg.used(...)
xmat <- as.matrix(x)
# use name "(Intercept)" here so coef names match linmod.formula
x <- cbind("(Intercept)" = 1, xmat)
fit <- linmod.fit(x, y)
fit$call <- match.call()
if(keep) {
fit$x <- xmat
# save y as a one-column matrix, so can use colname to save response name
colname <- deparse(substitute(y))[1]
colname <- gsub(" ", "", substr(colname, 1, 100)) # strip spaces, truncate
fit$y <- as.matrix(y, ncol = 1)
colnames(fit$y) <- colname
}
fit
}
linmod.formula <- function(formula = stop("no 'formula' argument"),
data = parent.frame(),
keep = FALSE,
...)
{
stop.if.dot.arg.used(...)
if(is.matrix(data)) # allow data to be a matrix
data <- as.data.frame(data) # will create colnames V1 V2 V3 if necessary
# note that na.action=na.pass because we will catch NAs later
# in linmod.fit, for uniformity with linmod.default
mf <- model.frame(formula = formula, data = data, na.action = na.pass)
terms <- attr(mf, "terms")
x <- model.matrix(terms, mf)
y <- model.response(mf)
fit <- linmod.fit(x, y)
fit$call <- match.call()
fit$terms <- terms
fit$xlevels <- .getXlevels(terms, mf) # for use by predict.linmod
if(keep)
fit$data <- data
fit
}
linmod.fit <- function(x = stop("no 'x' argument"),
y = stop("no 'y' argument"),
...)
{
# internal function, not for the casual user
# if model has an intercept, the first col of x must be intercept (all 1s)
stop.if.dot.arg.used(...)
x <- check.linmod.x(x)
y <- check.linmod.y(x, y)
fit <- do.linmod.fit(x, y)
class(fit) <- "linmod"
fit
}
check.linmod.x <- function(x)
{
if(!is.matrix(x))
stop("'x' is not a matrix or could not be converted to a matrix")
if(NROW(x) == 0 || NCOL(x) == 0)
stop("'x' is empty")
if(anyNA(x))
stop("NA in 'x'")
# checking just the first column of x suffices because all columns
# of a matrix have the same type
# we allow is.logical because qr etc. treat logical vars as numeric
if(!is.numeric(x[,1]) && !is.logical(x[,1]))
stop("non-numeric column in 'x'")
# ensure all columns in x are named (needed for names in vcov etc.)
# use the same naming convention as lm (prefix for unnamed cols is "V")
missing.colnames <-
if(is.null(colnames(x))) 1:NCOL(x)
else nchar(colnames(x)) == 0
colnames(x)[missing.colnames] <-
c("(Intercept)",
paste("V", seq_len(NCOL(x) - 1), sep = ""))[missing.colnames]
duplicated <- which(duplicated(colnames(x)))
if(length(duplicated))
stop("column name \"", colnames(x)[duplicated[1]],
"\" in 'x' is duplicated")
x
}
check.linmod.y <- function(x, y)
{
# as.vector(as.matrix(y)) is necessary when y is a data.frame
# (because as.vector alone on a data.frame returns a data.frame)
y <- as.vector(as.matrix(y))
if(length(y) == 0)
stop("'y' is empty")
if(anyNA(y))
stop("NA in 'y'")
if(!is.numeric(y) && !is.logical(y))
stop("'y' is not numeric or logical")
if(length(y) != nrow(x))
stop("nrow(x) is ", nrow(x), " but length(y) is ", length(y))
y
}
do.linmod.fit <- function(x, y)
{
# workhorse function for fitting linear models
# essential processing and sanity checks on x and y are already completed
# x is a numeric matrix, y is a numeric vector
qx <- qr(x) # QR-decomposition of x
if(qx$rank < ncol(x))
stop("'x' is singular (it has ", ncol(x),
" columns but its rank is ", qx$rank, ")\n colnames(x): ",
paste0(colnames(x), collapse=' '))
coef <- solve.qr(qx, y) # compute (x'x)^(-1) x'y
stopifnot(!anyNA(coef)) # NA impossible after rank check above
df.residual <- max(0, nrow(x) - ncol(x)) # degrees of freedom
sigma2 <- sum((y - x %*% coef)^2) / df.residual # variance of residuals
vcov <- sigma2 * chol2inv(qx$qr) # covar mat is sigma^2 * (x'x)^(-1)
fitted.values <- qr.fitted(qx, y)
colnames(vcov) <- rownames(vcov) <- colnames(x)
names(fitted.values) <- rownames(x)
colnames(coef) <- colnames(y)
# returned fields match lm's fields
list(coefficients = coef,
residuals = y - fitted.values,
rank = qx$rank,
fitted.values = fitted.values,
vcov = vcov,
sigma = sqrt(sigma2),
df.residual = df.residual)
}
predict.linmod <- function(object = stop("no 'object' argument"),
newdata = NULL,
type = "response",
...)
{
stopifnot(inherits(object, "linmod"))
stop.if.dot.arg.used(...)
match.arg(type, "response") # the type argument is not yet supported
if(is.null(newdata))
yhat <- fitted(object)
else {
if(NROW(newdata) == 0)
stop("'newdata' is empty") # preempt obscure message later
x <- if(is.null(object$terms)) # model built with linmod.default?
process.newdata(object, newdata)
else # model built with linmod.formula
process.newdata.formula(object, newdata)
# The following tests suffice to catch all illegal input. However
# they aren't ideal in that they don't always direct you to the root
# cause of the problem (i.e. the error messages aren't always optimal).
nvar <- length(object$coefficients) - 1 # nbr vars, -1 for intercept
if(ncol(x) - 1 != nvar)
stop("ncol(newdata) is ", ncol(x) - 1, " but should be ", nvar)
if(anyNA(x))
stop("NA in 'newdata'")
if(!is.numeric(x[,1]) && !is.logical(x[,1]))
stop("non-numeric column in 'newdata' (after processing)")
yhat <- as.vector(do.predict.linmod(object, x))
names(yhat) <- rownames(x)
}
yhat
}
process.newdata <- function(object, newdata)
{
# process newdata for models built with linmod.default
x <- if(is.vector(newdata)) # allow newdata to be a vector
matrix(newdata, ncol = length(object$coefficients) - 1)
else
as.matrix(newdata) # allow newdata to be a data.frame
cbind(1, x) # return data with an intercept column
}
process.newdata.formula <- function(object, newdata)
{
# process newdata for models built with linmod.formula
newdata <- as.data.frame(newdata) # allows newdata to be a matrix
terms <- object$terms
dataClasses <- attr(terms, "dataClasses")
iresp <- attr(terms, "response")
terms <- delete.response(terms)
# na.action=na.pass because we will catch NAs after (for clearer error msg)
# xlevels is needed to convert strings to factor levels, for example:
# a <- linmod(Sepal.Length~Species,data=iris)
# predict(a,newdata=data.frame(Species="setosa"))
mf <- model.frame(terms, newdata, na.action = na.pass, xlev = object$xlevels)
if(anyNA(mf))
stop("NA in 'newdata'")
if(NROW(mf) != NROW(newdata)) {
# Get here when model.frame() issues
# Warning: 'newdata' had M rows but variables found have N rows
# Must stop, else the call to model.matrix() below would silently return bad data.
# If a variable is missing, print its name to help the user.
# TODO This will erroneously identify "sqrt(x)" as a missing var in the
# formula "y ~ sqrt(x)" (because the var is wrapped in a func call).
varnames <- names(dataClasses)
varnames <- varnames[-iresp]
missing <- which(!(varnames %in% colnames(newdata)))
missing.msg <- ""
if(length(missing))
missing.msg <- paste0(" (variable '", varnames[missing[1]],
"' may be missing from newdata)")
stop("newdata has ", NROW(newdata),
" rows but model.frame returned ", NROW(mf), " rows", missing.msg)
}
.checkMFClasses(dataClasses, mf) # check types in newdata match original data
model.matrix(terms, mf)
}
do.predict.linmod <- function(object, x)
{
# workhorse function for linear model predictions
# processing by model.matrix etc. and sanity checks on x already completed
# x is a numeric matrix (if model has intercept, first col of x is all 1s)
x %*% coef(object)
}
summary.linmod <- function(object = stop("no 'object' argument"), ...)
{
stop.if.dot.arg.used(...)
se <- sqrt(diag(object$vcov))
t.value <- coef(object) / se
p.value <-
if(object$df.residual == 0) # avoid warning from pt()
rep_len(0, length.out=length(t.value))
else
2 * pt(-abs(t.value), df = object$df.residual)
coefficients <- cbind(Estimate = coef(object),
StdErr = se,
t.value = t.value,
p.value = p.value)
retval <- list(call = object$call,
coefficients = coefficients)
class(retval) <- "summary.linmod"
retval
}
print.linmod <- function(x = stop("no 'x' argument"), ...)
{
stop.if.dot.arg.used(...)
print.model.call(x)
print(x$coefficients)
invisible(x)
}
print.summary.linmod <- function(x = stop("no 'x' argument"), ...)
{
stop.if.dot.arg.used(...)
print.model.call(x)
print(x$coefficients)
invisible(x)
}
print.model.call <- function(x)
{
cat("Call: ") # print.lm has a newline here, but a space is more compact
# use paste0 to convert vector of strings to single string if necessary
cat(strwrap(paste0(deparse(x$call, control = NULL, nlines = 5),
sep = " ", collapse = " "), exdent = 6), sep = "\n")
cat("\n")
}
# stop.if.dot.arg.used will cause an error message if any args are passed to it.
# We use it to test if any dots arg of the calling function was used, for
# functions that must have a dots arg (to match the generic method) but don't
# actually use the dots. This helps the user catch mistyped or illegal args.
# R version 3.3-0 or higher has a function chkDots which could be used instead.
stop.if.dot.arg.used <- function()
{
NULL
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.