Nothing
# functions to aid in detecting linear dependent columns in the (transformed)
# model matrix or estimated plm models:
# * detect.lindep
# * alias (the latter is a wrapper around alias.lm)
# * detect_lin_dep for backward compatibility
#
# doc file provides an extensive example how linear dependence can arise after
# the data transformation, e. g., for within transformation
### detect.lindep.matrix, .data.frame, .plm
#' Functions to detect linear dependence
#'
#' Little helper functions to aid users to detect linear dependent columns in a
#' two-dimensional data structure, especially in a (transformed) model matrix -
#' typically useful in interactive mode during model building phase.
#'
#'
#' Linear dependence of columns/variables is (usually) readily avoided when
#' building one's model. However, linear dependence is sometimes not obvious
#' and harder to detect for less experienced applied statisticians. The so
#' called "dummy variable trap" is a common and probably the best--known
#' fallacy of this kind (see e. g. Wooldridge (2016), sec. 7-2.). When building
#' linear models with `lm` or `plm`'s `pooling` model, linear
#' dependence in one's model is easily detected, at times post hoc.
#'
#' However, linear dependence might also occur after some transformations of
#' the data, albeit it is not present in the untransformed data. The within
#' transformation (also called fixed effect transformation) used in the
#' `"within"` model can result in such linear dependence and this is
#' harder to come to mind when building a model. See **Examples** for two
#' examples of linear dependent columns after the within transformation: ex. 1)
#' the transformed variables have the opposite sign of one another; ex. 2) the
#' transformed variables are identical.
#'
#' During `plm`'s model estimation, linear dependent columns and their
#' corresponding coefficients in the resulting object are silently dropped,
#' while the corresponding model frame and model matrix still contain the
#' affected columns. The plm object contains an element `aliased` which
#' indicates any such aliased coefficients by a named logical.
#'
#' Both functions, `detect.lindep` and `alias`, help to
#' detect linear dependence and accomplish almost the same:
#' `detect.lindep` is a stand alone implementation while
#' `alias` is a wrapper around
#' [stats::alias.lm()], extending the `alias`
#' generic to classes `"plm"` and `"pdata.frame"`.
#' `alias` hinges on the availability of the package
#' \CRANpkg{MASS} on the system. Not all arguments of `alias.lm`
#' are supported. Output of `alias` is more informative as it
#' gives the linear combination of dependent columns (after data
#' transformations, i. e. after (quasi)-demeaning) while
#' `detect.lindep` only gives columns involved in the linear
#' dependence in a simple format (thus being more suited for automatic
#' post--processing of the information).
#'
#' @aliases detect.lindep
#' @param object for `detect.lindep`: an object which should be checked
#' for linear dependence (of class `"matrix"`, `"data.frame"`, or
#' `"plm"`); for `alias`: either an estimated model of class
#' `"plm"` or a `"pdata.frame"`. Usually, one wants to input a model
#' matrix here or check an already estimated plm model,
#' @param suppressPrint for `detect.lindep` only: logical indicating
#' whether a message shall be printed; defaults to printing the message, i. e.
#' to `suppressPrint = FALSE`,
#' @param model (see `plm`),
#' @param effect (see `plm`),
#' @param \dots further arguments.
#' @return For `detect.lindep`: A named numeric vector containing column
#' numbers of the linear dependent columns in the object after data
#' transformation, if any are present. `NULL` if no linear dependent
#' columns are detected.
#'
#' For `alias`: return value of [stats::alias.lm()] run on the
#' (quasi-)demeaned model, i. e. the information outputted applies to
#' the transformed model matrix, not the original data.
#' @note function `detect.lindep` was called `detect_lin_dep`
#' initially but renamed for naming consistency later with a
#' back-compatible solution.
#' @export
#' @author Kevin Tappe
#' @seealso [stats::alias()], [stats::model.matrix()] and especially
#' `plm`'s [model.matrix()] for (transformed) model matrices,
#' plm's [model.frame()].
#' @references
#'
#' \insertRef{WOOL:13}{plm}
#'
#' @keywords manip array
#' @examples
#'
#' ### Example 1 ###
#' # prepare the data
#' data("Cigar" , package = "plm")
#' Cigar[ , "fact1"] <- c(0,1)
#' Cigar[ , "fact2"] <- c(1,0)
#' Cigar.p <- pdata.frame(Cigar)
#'
#' # setup a formula and a model frame
#' form <- price ~ 0 + cpi + fact1 + fact2
#' mf <- model.frame(Cigar.p, form)
#' # no linear dependence in the pooling model's model matrix
#' # (with intercept in the formula, there would be linear depedence)
#' detect.lindep(model.matrix(mf, model = "pooling"))
#' # linear dependence present in the FE transformed model matrix
#' modmat_FE <- model.matrix(mf, model = "within")
#' detect.lindep(modmat_FE)
#' mod_FE <- plm(form, data = Cigar.p, model = "within")
#' detect.lindep(mod_FE)
#' alias(mod_FE) # => fact1 == -1*fact2
#' plm(form, data = mf, model = "within")$aliased # "fact2" indicated as aliased
#'
#' # look at the data: after FE transformation fact1 == -1*fact2
#' head(modmat_FE)
#' all.equal(modmat_FE[ , "fact1"], -1*modmat_FE[ , "fact2"])
#'
#' ### Example 2 ###
#' # Setup the data:
#' # Assume CEOs stay with the firms of the Grunfeld data
#' # for the firm's entire lifetime and assume some fictional
#' # data about CEO tenure and age in year 1935 (first observation
#' # in the data set) to be at 1 to 10 years and 38 to 55 years, respectively.
#' # => CEO tenure and CEO age increase by same value (+1 year per year).
#' data("Grunfeld", package = "plm")
#' set.seed(42)
#' # add fictional data
#' Grunfeld$CEOtenure <- c(replicate(10, seq(from=s<-sample(1:10, 1), to=s+19, by=1)))
#' Grunfeld$CEOage <- c(replicate(10, seq(from=s<-sample(38:65, 1), to=s+19, by=1)))
#'
#' # look at the data
#' head(Grunfeld, 50)
#'
#' form <- inv ~ value + capital + CEOtenure + CEOage
#' mf <- model.frame(pdata.frame(Grunfeld), form)
#' # no linear dependent columns in original data/pooling model
#' modmat_pool <- model.matrix(mf, model="pooling")
#' detect.lindep(modmat_pool)
#' mod_pool <- plm(form, data = Grunfeld, model = "pooling")
#' alias(mod_pool)
#'
#' # CEOtenure and CEOage are linear dependent after FE transformation
#' # (demeaning per individual)
#' modmat_FE <- model.matrix(mf, model="within")
#' detect.lindep(modmat_FE)
#' mod_FE <- plm(form, data = Grunfeld, model = "within")
#' detect.lindep(mod_FE)
#' alias(mod_FE)
#'
#' # look at the transformed data: after FE transformation CEOtenure == 1*CEOage
#' head(modmat_FE, 50)
#' all.equal(modmat_FE[ , "CEOtenure"], modmat_FE[ , "CEOage"])
#'
detect.lindep <- function(object, ...) {
UseMethod("detect.lindep")
}
#' @rdname plm-deprecated
#' @export
detect_lin_dep <- function(object, ...) {
.Deprecated(old = "detect_lin_dep", new = "detect.lindep",
msg = "Function name 'detect_lin_dep' deprecated, please use 'detect.lindep'")
UseMethod("detect.lindep")
}
#' @rdname detect.lindep
#' @method detect.lindep matrix
#' @export
detect.lindep.matrix <- function(object, suppressPrint = FALSE, ...) {
if (!inherits(object, "matrix")) {
stop("Input 'object' must be a matrix. Presumably, one wants a model matrix
generated by some 'model.matrix' function.")}
# do rank reduction to detect lin. dep. columns
rank_rec <- sapply(1:ncol(object), function(col) qr(object[ , -col])$rank)
if (diff(range(rank_rec)) == 0) {
num <- NULL # return NULL if there is no linear dep.
} else {
num <- which(rank_rec == max(rank_rec))
names(num) <- colnames(object)[num]
}
if(!suppressPrint) {
if(is.null(num)) {
print("No linear dependent column(s) detected.")
} else {
print(paste0("Suspicious column number(s): ", paste(num, collapse = ", ")))
print(paste0("Suspicious column name(s): ", paste(names(num), collapse = ", ")))
}
return(invisible(num))
}
return(num)
}
#' @rdname detect.lindep
#' @method detect.lindep data.frame
#' @export
detect.lindep.data.frame <- function(object, suppressPrint = FALSE, ...) {
if (!inherits(object, "data.frame")) {
stop("Input 'object' must be a data.frame")}
return(detect.lindep.matrix(as.matrix(object), suppressPrint = suppressPrint, ...))
}
#' @rdname detect.lindep
#' @method detect.lindep plm
#' @export
detect.lindep.plm <- function(object, suppressPrint = FALSE, ...) {
if (!inherits(object, "plm")) {
stop("Input 'object' must be of class \"plm\"")}
return(detect.lindep.matrix(model.matrix(object), suppressPrint = suppressPrint, ...))
}
### alias.plm, alias.pFormula
# This is just a wrapper function to allow to apply the generic stats::alias on
# plm objects and pFormulas with the _transformed data_ (the transformed model.matrix).
# NB: arguments 'model' and 'effect' are not treated here.
#' @rdname detect.lindep
#' @export
alias.plm <- function(object, ...) {
dots <- list(...)
if (!is.null(dots$inst.method)) stop("alias.plm/alias.pFormula: IV not supported")
if (length(formula(object))[2] == 2) stop("alias.plm/alias.pFormula: IV not supported")
# catch unsupported alias.lm args and convert
if (!is.null(dots[["partial"]])) {
if (dots[["partial"]]) {
dots[["partial"]] <- FALSE
warning("alias.plm/alias.pFormula: arg partial = TRUE not supported, changed to FALSE")
}
}
if (!is.null(dots[["partial.pattern"]])) {
if (dots[["partial.pattern"]]) {
dots[["partial.pattern"]] <- FALSE
warning("alias.plm/alias.pFormula: arg partial.pattern = TRUE not supported, changed to FALSE")
}
}
X <- model.matrix(object)
y <- pmodel.response(object)
lm.fit.obj <- lm.fit(X, y)
class(lm.fit.obj) <- "lm"
lm.fit.obj$terms <- deparse(object$formula)
## use lm.fit rather than lm():
## could estimate lm model with lm(), but takes more resources and
## need to remove extra classes "formula" for lm to prevent warning
# form <- object$formula
# form <- setdiff(class(form), c("pFormula", "Formula"))
# Xdf <- as.data.frame(X)
# ydf <- as.data.frame(y)
# names(ydf) <- names(object$model)[1]
# data <- cbind(ydf, Xdf)
# lmobj <- lm(form, data = data)
# return(stats::alias(lmobj))
return(stats::alias(lm.fit.obj, ... = dots))
}
## alias.pFormula <- function(object, data,
## model = c("pooling", "within", "Between", "between",
## "mean", "random", "fd"),
## effect = c("individual", "time", "twoways"),
## ...) {
## dots <- list(...)
## if (!is.null(dots$inst.method)) stop("alias.plm/alias.pFormula: IV not supported")
## model <- match.arg(model)
## effect <- match.arg(effect)
## formula <- object
## # check if object is already pFormula, try to convert if not
## if (!inherits(formula, "pFormula")) formula <- pFormula(formula)
## # check if data is already a model frame, convert to if not
## if (is.null(attr(data, "terms"))) {
## data <- model.frame.pFormula(pFormula(formula), data)
## }
## plmobj <- plm(formula, data = data, model = model, effect = effect, ...)
## # print(summary(plmobj))
## return(alias(plmobj, ...))
## }
#' @rdname detect.lindep
#' @export
alias.pdata.frame <- function(object,
model = c("pooling", "within", "Between", "between",
"mean", "random", "fd"),
effect = c("individual", "time", "twoways"),
...) {
dots <- list(...)
if (!is.null(dots$inst.method)) stop("alias.plm/alias.pFormula: IV not supported")
model <- match.arg(model)
effect <- match.arg(effect)
# check if data is already a model frame, if not exit
if (is.null(attr(object, "terms")))
stop("the argument must be a model.frame")
formula <- attr(object, "formula")
plmobj <- plm(formula, data = object, model = model, effect = effect, ...)
return(alias(plmobj, ...))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.