Nothing
##
## File: mcsSubset.R
##
################
## GENERATORS ##
################
## standard formula interface
##
## Args:
## formula - (formula)
## ... - forwarded to 'lm' and 'mcsSubset.lm'
## lm - (logical) if 'true', compute 'lm' component
##
## Rval: (mcsSubset)
## See 'mcsSubset.default'.
##
## NOTE: 'lm'
## If 'lm' is 'FALSE', the returned 'lm' component is
## an "empty" mockup.
##
mcsSubset.formula <- function (formula, ..., lm = FALSE) {
ret.lm <- lm; lm <- NULL
## keep call (of generic)
call <- match.call()
call[[1]] <- as.name("mcsSubset")
## lm call
lm.call <- match.call(expand.dots = TRUE)
m <- match(c("include", "exclude", "size", "penalty", "tolerance",
"pradius", "nbest", "lm"), names(lm.call), 0L)
lm.call[[1]] <- as.name("lm")
lm.call[m] <- NULL
## build 'lm' component
if (ret.lm) {
object <- eval(lm.call, parent.frame())
ret.m <- TRUE
ret.x <- TRUE
ret.y <- TRUE
} else {
## mock-up
dots <- list(...)
if (is.null(ret.m <- dots[["model"]])) ret.m <- TRUE
if (is.null(ret.x <- dots[["x"]])) ret.x <- FALSE
if (is.null(ret.y <- dots[["y"]])) ret.y <- FALSE
## model frame
mf.call <- match.call(expand.dots = TRUE)
m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"),
names(mf.call), 0L)
mf.call <- mf.call[c(1L, m)]
mf.call$drop.unused.levels <- TRUE
mf.call[[1L]] <- quote(stats::model.frame)
mf <- eval(mf.call, parent.frame())
## model terms
mt <- attr(mf, "terms")
## model matrix and response
x <- model.matrix(mt, mf, dots$contrasts)
y <- model.response(mf, "numeric")
## mock fit
qr <- array(rep(NA, NCOL(x)), dim = c(1, NCOL(x)),
dimnames = list(NULL, colnames(x)))
## mock lm
object <- list(call = lm.call)
class(object) <- "lm"
object$rank <- NCOL(x)
object$na.action <- attr(mf, "na.action")
object$offset <- as.vector(model.offset(mf))
object$contrasts <- attr(x, "contrasts")
object$xlevels <- .getXlevels(mt, mf)
object$terms <- mt
object$model <- mf
object$x <- x
object$y <- y
object$qr <- list(qr = qr)
}
## forward call
cl <- match.call(expand.dots = TRUE)
cl[[1L]] <- as.name("mcsSubset")
cl$formula <- cl$y <- cl$lm <- NULL
cl$object <- object
rval <- eval(cl, parent.frame())
## return value
rval$call <- call
if (!ret.m) rval$lm$model <- NULL
if (!ret.x) rval$lm$x <- NULL
if (!ret.y) rval$lm$y <- NULL
if (!ret.lm) rval$lm <- NULL
## done
rval
}
## interface for fitted lm regressions
##
## Args:
## object - (lm)
## ... - forwarded to 'mcsSubset.default'
## penalty - ("AIC"|"BIC"|numeric)
##
## Rval: (mcsSubset)
## See 'mcsSubset.default'.
##
mcsSubset.lm <- function (object, ..., penalty = 0) {
## keep call (of generic)
call <- match.call()
call[[1]] <- as.name("mcsSubset")
## model frame and terms
mf <- model.frame(object)
mt <- terms(object)
if (is.empty.model(mt)) {
stop ("empty model")
}
if (any(attr(mt, "dataClasses") == "factor")) {
stop ("model contains factors")
}
## model matrix and response
x <- model.matrix(object)
y <- model.response(mf)
## model weights and offset
if (is.null(w <- model.weights(mf))) w <- rep(1, NROW(x))
if (is.null(o <- model.offset(mf))) o <- rep(0, NROW(x))
wok <- w != 0; wnz <- w[wok]
x <- sqrt(wnz) * x[wok, , drop = FALSE]
y <- sqrt(wnz) * y[wok, drop = FALSE] - o[wok]
## penalty
if (tolower(penalty) == "bic") {
penalty <- log(sum(wnz))
attr(penalty, "label") <- "BIC"
}
## forward call
rval <- mcsSubset(x, y, penalty = penalty, ...)
## return value
rval$call <- call
rval$.lm <- rval$lm <- object
## done
rval
}
## default method
##
## Args:
## object - (matrix) model matrix
## y - (numeric[]) response variable
## include - (integer[]) regressors to force in
## exclude - (integer[]) regressors to force out
## size - (integer[]) subset sizes
## penalty - (numeric) AIC penalty
## tolerance - (numeric[]) tolerance
## pradius - (integer) preordering radius
## nbest - (integer) number of best subsets
## ... - ignored
## .algo - (character) for interal use
##
## Rval: (mcsSubset)
## call - (call)
## nobs - (integer)
## nvar - (integer)
## include - (integer[])
## exclude - (integer[])
## size - (integer[])
## intercept - (logical)
## penalty - ("AIC"|"BIC"|numeric)
## tolerance - (numeric[])
## nbest - (integer)
## rss - (array)
## aic - (array)
## which - (array)
## .nodes - (integer)
##
## NOTE: '.algo'
## The following values are recognized:
## 'dca' : Dropping Column Algorithm (no branch and bound)
## 'bba' : Branch and Bound Algorithm
## 'pbba' : BBA with variable preordering
## 'hbba' : PBBA with tolerances
## 'xbba*': experimental PBBA
##
mcsSubset.default <- function (object, y, include = NULL, exclude = NULL,
size = NULL, penalty = 0, tolerance = 0,
pradius = NULL, nbest = 1, ..., .algo = "hbba")
{
## keep call (of generic)
call <- match.call()
call[[1]] <- as.name("mcsSubset")
## model matrix
x <- as.matrix(object); object <- NULL
## dims
nobs <- NROW(x)
nvar <- NCOL(x)
## variables names
x.names <- colnames(x)
## include
if (is.null(include)) {
include <- numeric(0)
} else {
## numeric --> numeric
if (is.numeric(include)) {
include <- match(include, 1:nvar)
}
## character --> numeric
if (is.character(include)) {
include <- match(include, x.names)
}
## logical --> numeric
if (is.logical(include)) {
include <- which(rep(include, length.out = nvar))
}
## canonicalize include
if (any(is.na(include))) {
include <- na.omit(include)
warning ("non-existing columns selected in 'include'; fixing 'include': ",
"c(", paste(include, collapse = ","), ")")
}
include <- sort(unique(include))
}
## exclude
if (is.null(exclude)) {
exclude <- numeric(0)
} else {
## numeric --> numeric
if (is.numeric(exclude)) {
exclude <- match(exclude, 1:nvar)
}
## character --> numeric
if (is.character(exclude)) {
exclude <- match(exclude, x.names)
}
## logical --> numeric
if (is.logical(exclude)) {
exclude <- which(rep(exclude, length.out = nvar))
}
## canonicalize exclude
if (any(is.na(include))) {
exclude <- na.omit(exclude)
warning ("invalid columns selected in 'excluded'; fixing 'exclude': ",
"c(", paste(exclude, collapse = ","), ")")
}
exclude <- sort(unique(exclude))
}
## include/exclude non-overlapping
if (any(intersect(include, exclude))) {
exclude <- setdiff(exclude, include)
warning ("'include' and 'exclude' overlap; fixing 'exclude': ",
"c(", paste(exclude, collapse = ","), ")")
}
## intercept
intercept <- isTRUE(all.equal(as.vector(x[, 1]), rep(1, nobs)))
if (intercept) {
if (any(include == 1)) {
## OK, already selected
} else if (any(exclude == 1)) {
## not selected
intercept <- FALSE
} else {
## select
include <- c(1, include)
}
}
## which
which <- c(include, setdiff(1:nvar, include))
which <- setdiff(which, exclude)
## size
size.min <- length(include) + 1
size.max <- length(which)
if (is.null(size)) {
size <- size.min:size.max
} else {
size <- match(size, size.min:size.max)
size <- (size.min:size.max)[size]
if (any(is.na(size))) {
size <- na.omit(size)
warning ("invalid sizes selected in 'size'; fixing 'size': ",
"c(", paste(size, collapse = ","), ")")
}
}
## preordering radius
## TODO: validate
if (is.null(pradius)) {
pmin <- 8
} else {
pmin <- size.max - pradius
}
## penalty
if (tolower(penalty) == "aic") {
penalty <- 2
attr(penalty, "label") <- "AIC"
} else if (tolower(penalty) == "bic") {
penalty <- log(nobs)
attr(penalty, "label") <- "BIC"
} else if (!is.numeric(penalty)) {
stop ("invalid 'penalty'")
}
## tolerance
## TODO: validate
if (penalty == 0) {
tolerance <- rep(tolerance, length.out = nvar)
tolerance[-size] <- +Inf
} else {
tolerance <- tolerance[1]
}
## call C
C_mark <- size.min - 1
C_size <- size.max
C_v <- which - 1
C_xy <- cbind(x[, which], y)
C_tau <- tolerance + 1
if (penalty == 0) {
C_index <- rep(0, nvar * nbest)
C_rss <- rep(0, nvar * nbest)
C_aic <- double(0)
C_which <- rep(0, nvar * nbest * nvar)
C_tau <- C_tau[size]
C_tau[C_tau == +Inf] <- .Machine$double.xmax
C_tau <- c(0, C_tau)
} else {
C_index <- rep(0, nbest)
C_rss <- rep(0, nbest)
C_aic <- rep(0, nbest)
C_which <- rep(0, nbest * nvar)
}
C_args <- list(
algo = as.character(.algo),
nobs = as.integer(nobs),
nvar = as.integer(nvar),
size = as.integer(C_size),
mark = as.integer(C_mark),
nbest = as.integer(nbest),
pmin = as.integer(pmin),
v = as.integer(C_v),
xy = as.double(C_xy),
rss = as.double(C_rss),
aic = as.double(C_aic),
which = as.logical(C_which),
penalty = as.double(penalty),
tau = as.double(C_tau),
nodes = integer(1),
info = integer(1)
)
C_rval <- do.call(".C", c("R_mcsSubset", C_args))
## return value
rval <- list(call = call,
nobs = nobs,
nvar = nvar,
include = include,
exclude = exclude,
size = NULL,
intercept = intercept,
penalty = penalty,
tolerance = NULL,
nbest = nbest,
rss = NULL,
aic = NULL,
which = NULL,
.nodes = C_rval$nodes)
class(rval) <- "mcsSubset"
## extract value & subsets
if (penalty == 0) {
.cnames <- paste(1:nbest, ".", sep = "")
## size
rval$size <- size
## tolerance
rval$tolerance <- array(tolerance, dim = nvar,
dimnames = list(1:nvar))
## rss
rval$rss <- array(C_rval$rss, dim = c(nbest, nvar),
dimnames = list(.cnames, 1:nvar))
rval$rss[, -rval$size] <- NA
rval$rss[rval$rss == .Machine$double.xmax] <- NA
## which
rval$which <- array(C_rval$which, dim = c(nvar, nbest, nvar),
dimnames = list(x.names, .cnames, 1:nvar))
rval$which[, , -rval$size] <- NA
} else {
.cnames <- paste(1:nbest, ".", sep = "")
## tolerance
rval$tolerance <- tolerance
## rss
rval$rss <- array(C_rval$rss, dim = nbest, dimnames = list(.cnames))
## aic
rval$aic <- array(C_rval$aic, dim = nbest, dimnames = list(.cnames))
## which
rval$which <- array(C_rval$which, dim = c(nvar, nbest),
dimnames = list(x.names, .cnames))
## size
rval$size <- array(apply(rval$which, 2, sum), dim = nbest,
dimnames = list(.cnames))
}
## done
rval
}
## print method for 'mcsSubset' objects
##
## Args:
## x - (mcsSubset)
## digits - (integer)
## ... - ignored
##
## Rval: (mcsSubset) invisible
##
print.mcsSubset <- function (x, digits = NULL, ...)
{
catln <- function (...) base::cat(..., "\n", sep = "")
if (is.null(digits)) {
digits <- max(3, getOption("digits") - 3)
}
catln()
catln("Call:")
catln(deparse(x$call, width.cutoff = floor(getOption("width") * 0.85)))
val <- as.matrix(c(format(x$nvar),
if (x$intercept) "Yes" else "No",
paste(x$include, collapse = " "),
paste(x$exclude, collapse = " "),
paste(x$size, collapse = " "),
if (x$penalty == 0) {
"RSS"
} else if (is.null(lab <- attr(x$penalty, "label"))) {
paste("AIC (k = ", format(x$penalty, digits = digits),
")", sep = "")
} else {
lab
},
paste(format(x$tolerance, digits = digits, trim = TRUE),
collapse = " "),
format(x$nbest)),
ncol = 1)
colnames(val) <- ""
rownames(val) <- c("Total regressors:", "Intercept:", "Include:", "Exclude:",
"Size:", "Criterion:", "Tolerance:", "N best:")
print(val, quote = FALSE)
invisible(x)
}
## plot method for 'mcsSubset' objects
##
## Args:
## x - (mcsSubset)
## type - (character)
## main - (character)
## xlab - (character)
## ylab - (character)
## col - (integer[]|character[])
## lty - (integer)
## ... - passed to 'deviance.mcsSubset' or 'AIC.mcsSubset'
## legend - (logical)
##
## Rval: (mcsSubset) invisible
##
## Graphical arguments are passed to 'plot.default'.
##
plot.mcsSubset <- function (x, type = "b", main = "Deviance",
xlab = NULL, ylab = "", col = "blue",
lty = 1, ..., legend = TRUE) {
if (x$penalty == 0) {
## rss
rss <- deviance(x, ...)
## sub title
sub <- "RSS"
## xlab
if (is.null(xlab)) {
xlab <- "Number of regressors in model"
}
## plot
plot(names(rss), rss, type = type, main = main, sub = sub,
xlab = xlab, ylab = ylab, col = col, lty = lty)
## legend
if (legend) {
legend("topright", "RSS", col = col, lty = lty, bty = "n")
}
} else {
digits <- max(3, getOption("digits") - 3)
## best
best <- 1:x$nbest
## aic
aic <- AIC(x, best = best, ...)
if (length(aic) > 1) {
aic <- aic$AIC
}
## sub title
sub <- attr(x$penalty, "label")
if (is.null(sub)) {
sub <- paste("AIC (k = ", format(x$penalty, digits = digits), ")", sep = "")
}
## xlab
if (is.null(xlab)) {
xlab <- "Best"
}
## plot
plot(best, aic, type = type, main = main, sub = sub,
xlab = xlab, ylab = ylab, col = col, lty = lty)
## labels (subset size)
text(best, aic, labels = paste("(size=", x$size, ")", sep = ""),
adj = c(0, 1.5), cex = 0.8)
## legend
if (legend) {
legend("topleft", sub, lty = lty, col = col, bty = "n")
}
}
## done
invisible(x)
}
#############
## METHODS ##
#############
## extract variable names
##
## Args:
## object - (mcsSubset)
## size - (integer)
## best - (integer)
## ... - ignored
## .full - (logical) for internal use
## .neg - (logical) for internal use
##
## Rval: (character[])
## variable names
##
variable.names.mcsSubset <- function (object, size = NULL, best = 1, ...,
.full = FALSE, .neg = FALSE) {
if (is.null(object$.lm)) {
stop ("'mcsSubset' object does not have a 'lm' component")
}
## full model
x.names <- variable.names(object$.lm)
if (.full) {
return (x.names)
}
## which
if (object$penalty == 0) {
wi <- object$which[, best, size]
} else {
wi <- object$which[, best]
}
## negate
wi <- xor(wi, .neg)
## select
x.names <- x.names[wi]
## done
x.names
}
## extract formula
##
## Args:
## x - (mcsSubset)
## ... - forwarded to 'variable.names.mcsSubset'
##
## Rval: (formula)
##
formula.mcsSubset <- function (x, ...) {
if (is.null(x$.lm)) {
stop ("'mcsSubset' object does not have a 'lm' component")
}
## full model
f <- formula(x$.lm)
## variable names
x.names <- variable.names(x, ..., .neg = TRUE)
## update formula
f <- update(f, paste(".~.", paste(c("", x.names), collapse = "-")))
## done
f
}
## extract model frame
##
## Args:
## formula - (mcsSubset)
## ... - forwarded to 'formula.mcsSubset'
##
## Rval: (data.frame)
##
model.frame.mcsSubset <- function (formula, ...) {
if (is.null(formula$.lm)) {
stop ("'mcsSubset' object does not have a 'lm' component")
}
## full model
mf <- model.frame(formula$.lm)
## formula
f <- formula(formula, ...)
## build
mf <- model.frame(f, data = mf)
## done
mf
}
## extract model matrix
##
## Args:
## object - (mcsSubset)
## size - (integer)
## best - (integer)
## ... - ignored
##
## Rval: (matrix)
##
model.matrix.mcsSubset <- function (object, size = NULL, best = 1, ...) {
if (is.null(object$.lm)) {
stop ("'mcsSubset' object does not have a 'lm' component")
}
## full model
x <- model.matrix(object$.lm)
## which
if (object$penalty == 0) {
wi <- object$which[, best, size]
} else {
wi <- object$which[, best]
}
## select
x <- x[, wi]
## done
x
}
## refit
##
## Args:
## object - (mcsSubset)
## ... - forwarded to 'formula.mcsSubset'
##
## Rval: (lm)
##
refit.mcsSubset <- function (object, ...) {
if (is.null(object$.lm)) {
stop ("'mcsSubset' object does not have a 'lm' component")
}
## extract formula
f <- formula(object, ...)
## model frame call
mf.call <- match.call(expand.dots = TRUE)
mf.call[[1]] <- as.name("model.frame")
mf.call$formula <- mf.call$object; mf.call$object <- NULL
## lm call
lm.call <- object$.lm$call
lm.call$formula <- f
lm.call$data <- mf.call
## eval
lm <- eval(lm.call, parent.frame())
## done
lm
}
## extract ceofficients
##
## Args:
## object - (mcsSubset)
## ... - forwarded to 'refit.mcsSubset'
##
## Rval: (numeric[])
##
## Note: 'refit'
## This method refits the 'mcsSubset' object and calls 'coef' on the
## obtained 'lm' object. In some circumstances it might be more
## efficient to call 'refit' directly and execute 'coef' on the
## obtained 'lm' object.
##
coef.mcsSubset <- function (object, ...) {
coef(refit(object, ...))
}
## extract variance-covariance matrix
##
## Args:
## object - (mcsSubset)
## ... - forwarded to 'refit.mcsSubset'
##
## Rval: (matrix)
##
## Note: 'refit'
## See 'coef.mcsSubset'.
##
vcov.mcsSubset <- function (object, ...) {
vcov(refit(object, ...))
}
## extract fitted values
##
## Args:
## object - (mcsSubset)
## ... - forwarded to 'refit.mcsSubset'
##
## Rval: (numeric[])
##
## Note: 'refit'
## See 'coef.mcsSubset'.
##
fitted.mcsSubset <- function (object, ...) {
fitted(refit(object, ...))
}
## extract residuals
##
## Args:
## object - (mcsSubset)
## ... - forwarded to 'refit.mcsSubset'
##
## Rval: (numeric[])
##
## Note: 'refit'
## See 'coef.mcsSubset'.
##
residuals.mcsSubset <- function (object, ...) {
residuals(refit(object, ...))
}
## extract deviance
##
## Args:
## object - (mcsSubset)
## size - (integer[])
## best - (integer[])
## ... - ignored
##
## Rval: (numeric[])
##
## Returns the RSS for the specified subset(s).
##
deviance.mcsSubset <- function (object, size = NULL, best = 1, ...) {
## compute indices
if (object$penalty == 0) {
if (is.null(size)) size <- object$size
d <- object$rss[best, size]
names(d) <- size
} else {
d <- object$rss[best]
}
## done
d
}
## extract log-likelihood (base method)
##
## Args:
## object - (mcsSubset)
## size - (integer[])
## best - (integer[])
## ... - ignored
## df - (integer[]) degrees of freedom
##
## Rval: (logLik)
##
## Note:
## Can handle multiple objects.
##
logLik.mcsSubset <- function (object, size = NULL, best = 1, ..., df) {
## degrees of freedom
if (object$penalty == 0) {
if (is.null(size)) size <- object$size
df <- rep(size + 1, each = length(best))
} else {
df <- object$size[best] + 1
}
## weights
if (is.null(w <- weights(object$.lm))) {
n <- object$nobs
s <- 0
} else {
wnz <- w[w != 0]
n <- sum(wnz)
s <- sum(log(wnz))
}
## extract rss
rss <- deviance(object, size = size, best = best)
## done
structure(0.5 * (s - n * (log(2 * pi) + 1 - log(n) + log(rss))),
df = df, nobs = n, class = "logLik")
}
## compute AIC
##
## Args:
## object - (mcsSubset)
## size - (integer[])
## best - (integer[])
## ... - ignored
## k - ("AIC"|"BIC"|numeric)
##
## Rval: (numeric|data.frame)
##
AIC.mcsSubset <- function (object, size = NULL, best = 1, ..., k = NULL) {
if (object$penalty == 0) {
if (is.null(size)) size <- object$size
if (is.null(k)) k <- 2
} else {
if (is.null(k)) k <- object$penalty
}
## extract log-likelihoods
ll <- logLik(object, size = size, best = best)
## penalty
if (tolower(k) == "aic") {
penalty <- 2; attr(penalty, "label") <- "AIC"
} else if (tolower(k) == "bic") {
n <- attr(ll, "nobs")
penalty <- log(n); attr(penalty, "label") <- "BIC"
} else {
penalty <- 2
}
## compute AICs
aic <- AIC(ll, k = penalty)
attr(aic, "penalty") <- penalty; attr(aic, "label") <- NULL
## data frame?
if (length(aic) > 1) {
aic <- data.frame(df = attr(ll, "df"), AIC = aic)
}
## done
aic
}
## compute BIC
##
## Args:
## object - (mcsSubset)
## size - (integer[])
## best - (integer[])
## ... - ignored
##
## Rval: (numeric|data.frame)
##
BIC.mcsSubset <- function (object, size = NULL, best = 1, ...) {
aic <- AIC(object, size = size, best = best, k = "BIC")
if (length(aic) > 1) {
aic$BIC <- aic$AIC; aic$AIC <- NULL
}
aic
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.