Nothing
# file complexlm/R/zlm.wfit.R
# copyright (C) 2022 W. L. Ryan
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 or 3 of the License
# (at your option).
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
#
####
### A function that somewhat replicates model.matrix(), but accepts complex valued data. It will probably be slower and less efficient, but mostly functional.
### It cannot handle algebraic expressions in formula.
### terms is the output of terms(formula)
####
#' Generate a Model Matrix (Design Matrix) Using Complex Variables
#'
#' A function that somewhat replicates model.matrix(), but accepts complex valued data. It will probably be slower and less efficient, but mostly functional.
#' It cannot handle algebraic expressions in formula.
#'
#' @param trms A "terms" object as generated by [stats::terms()].
#' @param data A data frame containing the data referenced by the symbolic formula / model in `trms`.
#' @param contrasts.arg a list, default is NULL. Not currently supported. See [stats::model.matrix()] for details.
#'
#'
#' @return A model matrix, AKA a design matrix for a regression, containing the relevant information from `data`.
#' @export
#'
#' @seealso [stats::model.matrix], [complexlm::lm]
#'
#' @examples
#' set.seed(4242)
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' tframe <- expand.grid(-3:3,-3:3)
#' Xt <- complex(real = tframe[[1]], imaginary = tframe[[2]])
#' tframe <- data.frame(Xt=Xt, Yt= Xt * slop + interc + complex(real=rnorm(length(Xt)),
#' imaginary=rnorm(length(Xt))))
#' testterms <- terms(Yt ~ Xt)
#' zmodel.matrix(testterms, tframe)
zmodel.matrix <- function(trms, data, contrasts.arg = NULL)
{
respname <- as.character(attr(trms, "variables")[[attr(trms, "response") + 1]])
termlabs <- attr(trms, "term.labels")
interactions <- grep(":", attr(trms, "term.labels"), value = TRUE)
if (length(interactions) == 0) prednames <- termlabs
else prednames <- termlabs[termlabs != interactions]
modelframe <- data[prednames]
if (length(interactions) != 0) {
for (inter in interactions) {
intersplit <- strsplit(inter, ":")[[1]]
modelframe[, inter] <- data[, intersplit[1]] * data[, intersplit[2]]
}
}
if (attr(trms, "intercept") == 1) modelmatrix <- as.matrix(data.frame("(intercept)" = rep(1,length(modelframe[,1])), modelframe))
else modelmatrix <- as.matrix(modelframe)
if (attr(trms, "intercept") == 1) attr(modelmatrix, "assign") <- 0:length(termlabs)
else attr(modelmatrix, "assign") <- 1:length(termlabs)
if (length(interactions) != 0) {
attr(modelmatrix, "dimnames") <- list(as.character(1:length(modelframe[,1])), c("(intercept)", prednames, interactions)) # Fix the dimnames to match those on standard model matrices.
}
else attr(modelmatrix, "dimnames") <- list(as.character(1:length(modelframe[,1])), c("(intercept)", prednames)) # Fix the dimnames to match those on standard model matrices.
return(modelmatrix)
}
#### This function will be used instead of .Call(C_Cdqlrs, x * wts, y * wts, tol, FALSE) if x and/or y are complex.
#' An alternative to `.Call(C_Cdqlrs, x * wts, y * wts, tol, FALSE))` that is compatible with complex variables
#'
#' This serves as a wrapper for qr, replicating the behavior and output of the C++ function `C_Cdqlrs`. It is used in `zlm.wfit`,
#' and is unlikely to be needed by end users.
#'
#' @param x a complex matrix (will also accept numeric, but in that case you might as well use `C_Cdqlrs`) whose QR decomposition is to be computed.
#' @param y a complex vector or matrix of right-hand side quantities.
#' @param tol the tolerance for detecting linear dependencies in the columns of x. Not used for complex `x`.
#' @param chk not used. Included to better imitate `C_Cdqlrs`.
#'
#' @return A list that includes the qr decomposition, its coeffcionts, residuals, effects, rank, pivot information, qraux vector,
#' tolerance, and whether or not it was pivoted. This is the same output as `C_Cdqlrs`.
#' @export
#'
#' @seealso [base::qr], [complexlm::lm], [complexlm::zlm.wfit], [complexlm::rlm]
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slope <- complex(real = 4.23, imaginary = 2.323)
#' intercept <- complex(real = 1.4, imaginary = 1.804)
#' x <- complex(real = rnorm(n), imaginary = rnorm(n))
#' y <- slope * x + intercept
#' complexdqlrs(x, y, 10^-4, 1.2)
complexdqlrs <- function(x, y, tol = 1E-07, chk)
{
thisqr <- qr(x, tol = tol)
coefficients <- qr.coef(thisqr, y)
resids <- y - as.matrix(x) %*% coefficients
effects <- qr.qty(thisqr, y)
xrank <- thisqr$rank
pivot <- thisqr$pivot
qraux <- thisqr$qraux
pivoted <- any(pivot > (1:length(pivot)) + 1) * 1
ans = list(qr = thisqr$qr, coefficients = coefficients, residuals = resids, effects = effects, rank = xrank,
pivot = pivot, qraux = qraux, tol = tol, pivoted = pivoted)
return(ans)
}
####
### An adaptation of lm that is compatible with complex variables. If the response is not complex, it calls the standard stats::lm
### Note: It is not capable of dealing with contrasts in the complex case. May not understand offsets either. It also can't handle algebraic expressions in formula.
### model.frame needs to be changed to allow complex variables in order to enable these features.
####
#' Linear Model Fitting for Complex or Numeric Variables
#'
#' An adaptation of lm that is compatible with complex variables. If the response is not complex, it calls the standard [stats::lm()]
#' Note: It is not capable of dealing with contrasts in the complex case.
#' The formula interpretation is also unable of handle algebraic expressions in `formula`.
#'
#' @inherit stats::lm params
#'
#' @param formula an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model to be fitted.
#' For complex variables there are restrictions on what kinds of formula can be comprehended. See [complexlm::zmodel.matrix] for details.
#' @param x logical. If `TRUE` return the model matrix of the fit. Default is `TRUE`.
#' @param contrasts Not implemented for complex variables. See [complexlm::zmodel.matrix] for details. Default is `NULL`
#'
#' @details Like [stats::lm] the models are specified symbolically using the standard R formula notation:
#' `response ~ terms`
#' Meaning `response` is a linear combination of the predictor variables in `terms`.
#' For compatibility with complex numbers `complexlm::lm` uses [zmodel.matrix] to build the model matrix
#' from the model formula. As such it is limited to terms consisting of predictor names connected by `+` and/or `:` operators.
#' Anything more complicated will result in an error.
#'
#' If response is a matrix, then then `lm()` fits a model to each column, and returns an object with class "mlm".
#'
#' For complex input, this function calls [zlm.wfit].
#'
#' @note In `complexlm`, the `x` parameter defaults to `TRUE` so that followup
#' methods such as [predict.lm] have access to the model matrix.
#'
#' @return
#' Returns an object of class `c("zlm", "lm")`, or for multiple responses `c("zlm", "mlm", "lm")`.
#'
#' The functions [summary] and [anova] are used to obtain and print a summary and analysis of variance table of the results.
#' The generic functions [coefficients], [effects], [fitted.values] and [residuals] extract various useful features of the value returned by lm.
#' Of course these things can also be accessed by simply using the get element symbol `$`.
#'
#' Objects of class "zlm" are lists with the following components.
#' \item{`coefficients`}{a named vector of coefficients.}
#' \item{`residuals`}{the residuals, that is response minus fitted values.}
#' \item{`fitted.values`}{The fitted values, which are response values obtained by feeding the predictors into the model.}
#' \item{`rank`}{The numeric rank of the fitted linear model.}
#' \item{`weights`}{Numeric. The user supplied weights for the linear fit. If none were given, a vector of `1`s of length equal to that of the input data.}
#' \item{`df.residual`}{The residual degrees of freedom.}
#' \item{`call`}{The matched call.}
#' \item{`terms`}{the [terms] object used.}
#' \item{`contrasts`}{The contrasts used, as these are not supported this component will probably be `NULL`.}
#' \item{`xlevels`}{(only where relevant) a record of the levels of the factors used in fitting.}
#' \item{`offset`}{the offset used (missing if none were used).}
#' \item{`y`}{if requested, the response used.}
#' \item{`x`}{the model matrix used, unless requested to not return it.}
#' \item{`model`}{if requested, the model frame used.}
#' \item{`na.action`}{(where relevant) information returned by [model.frame] on the special handling of NAs.}
#' \item{`assign`}{Used by extractor functions like [summary] and [effects] to understand variable names. Not included in null fits.}
#' \item{`effects`}{Complex list. See [effects] for explanation. Not included in null fits.}
#' \item{`qr`}{unless declined, the output of the [qr] object created in the process of fitting. Not included in null fits.}
#' @export
#'
#' @seealso [complexlm::lm.fit], [complexlm::lm.wfit], [zlm.wfit], [zmodel.matrix], [complexdqlrs], [stats::lm]
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' xx <- complex(real= rnorm(n), imaginary= rnorm(n))
#' tframe <- data.frame(x= xx, y= slop*xx + interc + e)
#' lm(y ~ x, data = tframe, weights = rep(1,n))
lm <- function(formula, data, subset, weights, na.action,
method = "qr", model = TRUE, x = TRUE, y = FALSE,
qr = TRUE, singular.ok = TRUE, contrasts = NULL,
offset, ...)
{
cl <- match.call()
trms <- terms(x = formula, data = data)
respname <- as.character(attr(trms, "variables")[[attr(trms, "response") + 1]])
if (is.complex(data[[1,respname]]) == FALSE) # Now compatible with tibble input
{
cl[[1]] <- stats::lm
eval(cl, parent.frame())
}
else
{
ret.x <- x
ret.y <- y
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"),
names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
## need stats:: for non-standard evaluation
mf[[1L]] <- quote(stats::model.frame) # It works with complex numbers :)
mf <- eval(mf, parent.frame())
if (method == "model.frame")
return(mf)
else if (method != "qr")
warning(gettextf("method = '%s' is not supported. Using 'qr'", method),
domain = NA)
mt <- attr(mf, "terms") # allow model.frame to update it
y <- model.response(mf) # Huh, this does work with complex numbers.
## avoid any problems with 1D or nx1 arrays by as.vector.
w <- as.vector(model.weights(mf))
if(!is.null(w) && !is.numeric(w))
stop("'weights' must be a numeric vector")
offset <- model.offset(mf) # Uncertain if this works with complex variables. It appears to...
mlm <- is.matrix(y)
ny <- if(mlm) nrow(y) else length(y)
if(!is.null(offset)) {
if(!mlm) offset <- as.vector(offset)
if(NROW(offset) != ny)
stop(gettextf("number of offsets is %d, should equal %d (number of observations)",
NROW(offset), ny), domain = NA)
}
if (is.empty.model(mt)) {
x <- NULL
z <- list(coefficients = if(mlm) matrix(NA_real_, 0, ncol(y))
else numeric(),
residuals = y,
fitted.values = 0 * y, weights = w, rank = 0L,
df.residual = if(!is.null(w)) sum(w != 0) else ny)
if(!is.null(offset)) {
z$fitted.values <- offset
z$residuals <- y - offset
}
}
else {
if (is.null(contrasts) == FALSE) warning("Contrasts are not supported for complex fits.")
x <- zmodel.matrix(mt, mf)
z <- if(is.null(w)) lm.fit(x, y, offset = offset,
singular.ok=singular.ok, ...)
else lm.wfit(x, y, w, offset = offset, singular.ok=singular.ok, ...)
}
class(z) <- c("zlm", if(mlm) "mlm", "lm")
z$na.action <- attr(mf, "na.action")
z$offset <- offset
z$contrasts <- attr(x, "contrasts")
z$xlevels <- .getXlevels(mt, mf)
z$call <- cl
z$terms <- mt
if (model)
z$model <- mf
if (ret.x)
z$x <- x
if (ret.y)
z$y <- y
if (!qr) z$qr <- NULL
return(z)
}
}
####
### Wrapper for lm.fit() If data is numeric, use lm.fit() from stats. If it is complex, use zlm.wfit().
####
#' Complex Variable Compatible Wrappers for [stats::lm.fit()] and [stats::lm.wfit()]
#'
#' This function is just an if statement.
#' If the design matrix `x` is complex, [zlm.wfit()] is called.
#' Otherwise [stats::lm.fit()] or [stats::lm.wfit()] is called.
#' These functions are unlikely to be needed by end users, as they are called by [lm()].
#'
#' @inherit stats::lm.wfit params return
#'
#' @export
#'
#' @examples
#' set.seed(4242)
#' n <- 6
#' p <- 2
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' slop2 = complex(real = 2.1, imaginary = -3.9)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' desmat <- matrix(c(complex(real = rnorm(n * p), imaginary = rnorm(n * p)), rep(1, n)), n, p + 1)
#' y = desmat %*% c(slop, slop2, interc) + e
#' lm.fit(desmat, y)
lm.fit <- function(x, y, offset = NULL, method = "qr", tol = 1e-7,
singular.ok = TRUE, ...)
{
cll <- match.call()
if (is.complex(x)) cll[[1]] <- zlm.wfit
else cll[[1]] <- stats::lm.fit
eval(cll, parent.frame())
}
####
### Wrapper for lm.wfit(). If data is numeric, use lm.wfit() from stats. If it is complex, use zlm.wfit().
####
#' Fitting function for complex linear models
#'
#' @describeIn lm.fit wrapper for weighted linear fitting function.
#'
#' @export
lm.wfit <- function(x, y, w, offset = NULL, method = "qr", tol = 1e-7,
singular.ok = TRUE, ...)
{
cll <- match.call()
if (is.complex(x)) cll[[1]] <- zlm.wfit
else cll[[1]] <- stats::lm.wfit
eval(cll, parent.frame())
}
#' Least-Squares Linear Fitting for Complex Variables
#'
#' The function eventually called by [lm()], [lm.fit()], and/or [lm.wfit()] if fed complex data.
#' Performs ordinary (least-squares) linear fitting on complex variable data.
#' Like [stats::lm.wfit()], which it is based off of, it uses qr decomposition
#' for the matrix algebra. Unlike `stats::lm.wfit()` it also handles un-weighted
#' regression, by setting the weights to 1 by default.
#'
#' @param x a complex design matrix, `n` rows by `p` columns.
#' @param y a vector of observations/responses of length `n`, or a matrix with `n` rows.
#' @param w a vector of weights to be used in the fitting process. The sum of `w * r^2` is minimized, with `r` being the residuals. By default, `w` is a vector of length `n` with every element equal to 1, making this an unweighted fit.
#' @param offset optional. A complex vector of length n that will be subtracted from y prior to fitting.
#' @param method optional. a string that can be used to choose any method you would like. As long as it is "qr".
#' @param tol tolerance for the [qr] decomposition. Default is 1e-7.
#' @param singular.ok logical. If false, a singular model is an error.
#' @param ... currently disregarded.
#'
#' @inherit stats::lm.wfit return
#' @export zlm.wfit
#'
#' @inherit lm.wfit examples
zlm.wfit <- function (x, y, w = rep(1L, ifelse(is.vector(x), length(x), nrow(x))), offset = NULL, method = "qr", tol = 1e-07,
singular.ok = TRUE, ...)
{
#print(paste("length of weights is ", length(w))) # Used for debugging.
if (is.null(n <- nrow(x)))
stop("'x' must be a matrix")
if (n == 0)
stop("0 (non-NA) cases")
ny <- NCOL(y)
if (is.matrix(y) && ny == 1L)
y <- drop(y)
if (!is.null(offset))
y <- y - offset
if (NROW(y) != n | length(w) != n)
stop("incompatible dimensions")
if (any(w < 0 | is.na(w)))
stop("missing or negative weights not allowed")
if (method != "qr")
warning(gettextf("method = '%s' is not supported. Using 'qr'",
method), domain = NA)
chkDots(...)
x.asgn <- attr(x, "assign")
#print(w)
zero.weights <- any(w == 0) # Wait a minute... This could get in the way of redescending M-estimators!
if (zero.weights) {
save.r <- y
save.f <- y
save.w <- w
ok <- w != 0
nok <- !ok
w <- w[ok] ## Here we drop all the weights that equal zero!! Ok, but we should drop the corresponding x and y elements too.
x0 <- x[!ok, , drop = FALSE]
x <- x[ok, , drop = FALSE]
n <- nrow(x)
y0 <- if (ny > 1L)
y[!ok, , drop = FALSE]
else y[!ok]
y <- if (ny > 1L)
y[ok, , drop = FALSE]
else y[ok]
}
p <- ncol(x)
if (p == 0) {
return(list(coefficients = numeric(), residuals = y,
fitted.values = 0 * y, weights = w, rank = 0L, df.residual = length(y)))
}
if (n == 0) {
return(list(coefficients = rep(NA_real_, p), residuals = y,
fitted.values = 0 * y, weights = w, rank = 0L, df.residual = 0L))
}
wts <- sqrt(w)
#print(wts) # Used for debugging.
#print(typeof(wts)) # Used for debugging.
#print(typeof(x)) # Used for debugging.
#print(nrow(x)) # debugging. x is a matrix, of course.
#print(length(wts))
#print(x)
#print(wts)
z <- complexdqlrs(x * wts, y * wts, tol, FALSE)
if (!singular.ok && z$rank < p)
stop("singular fit encountered")
coef <- z$coefficients
#print(coef) # Used for debugging.
pivot <- z$pivot
r1 <- seq_len(z$rank)
dn <- colnames(x)
if (is.null(dn))
dn <- paste0("x", 1L:p)
nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank))
r2 <- if (z$rank < p)
(z$rank + 1L):p
else integer()
if (is.matrix(y)) {
coef[r2, ] <- NA
if (z$pivoted)
coef[pivot, ] <- coef
dimnames(coef) <- list(dn, colnames(y))
dimnames(z$effects) <- list(nmeffects, colnames(y))
}
else {
coef[r2] <- NA
if (z$pivoted)
coef[pivot] <- coef
names(coef) <- dn
names(z$effects) <- nmeffects
}
z$coefficients <- coef
z$residuals <- z$residuals/wts
z$fitted.values <- y - z$residuals
z$weights <- w
if (zero.weights) {
coef[is.na(coef)] <- 0
f0 <- x0 %*% coef
if (ny > 1) {
save.r[ok, ] <- z$residuals
save.r[nok, ] <- y0 - f0
save.f[ok, ] <- z$fitted.values
save.f[nok, ] <- f0
}
else {
save.r[ok] <- z$residuals
save.r[nok] <- y0 - f0
save.f[ok] <- z$fitted.values
save.f[nok] <- f0
}
z$residuals <- save.r
z$fitted.values <- save.f
z$weights <- save.w
}
if (!is.null(offset))
z$fitted.values <- z$fitted.values + offset
if (z$pivoted)
colnames(z$qr) <- colnames(x)[z$pivot]
qr <- z[c("qr", "qraux", "pivot", "tol", "rank")]
c(z[c("coefficients", "residuals", "fitted.values", "effects",
"weights", "rank")], list(assign = x.asgn, qr = structure(qr,
class = "qr"), df.residual = n - z$rank))
}
####
### A wrapper for summary.lm(). If the residuals are numeric, call summary.lm() from stats. Not anymore, I had to make a new 'zlm' class :( Easier and faster than I thought :)
#' Summarize Complex Linear Model Fits.
#'
#' Summary method for complex linear fits of class "zlm".
#' Based off of, and very similar to [stats::summary.lm]. However it does not delve into quantiles or 'significance stars', and includes the 'pseudo variance'.
#'
#' @param object An object of class "zlm". Presumably returned by [complexlm::lm]. May contain complex variables.
#' @param x An object of class "summary.zlm", usually generated by a call to `summary.zlm`.
#' @param digits the number of significant digits to include when printing.
#' @param correlation Logical. If TRUE, the correlation matrix of the estimated parameters is returned and printed.
#' @param symbolic.cor Logical. If TRUE, print the correlations in a symbolic form (see [stats::symnum]) rather than as numbers. (This may not work.)
#' @param ... Further arguments passed to or from other methods.
#'
#' @details See [stats::summary.lm] for more information.
#' In addition to the information returned by `stats::summary.lm`, this complex variable compatible version also returns
#' "pseudo standard error" or "relational standard error" which is the square root of the "pseudo-variance".
#' This is a complex number that quantifies the covariance between the real and imaginary parts. Can also be thought of as the amount and direction of anisotropy of the
#' (presumed complex normal) probability distribution of the residuals in the complex plane. The argument of this number gives the direction of the semi-major axis of an iso-probability-density ellipse
#' centered on the mean, while its modulus is the length of the semi-major axis. The variance, meanwhile, gives the area of this ellipse, divided by pi. Together they fully describe it.
#'
#' @return Returns an object of class "summary.zlm" and "summary.lm", that is a list containing the following elements.
#' \item{`residuals`}{Complex or numeric. The weighted residuals, that is the measured value minus the fitted value, scaled by the square root of the weights given in the call to lm.}
#' \item{`correlation`}{A complex matrix with real diagonal elements. The computed correlation coefficient matrix for the coefficients in the model.}
#' \item{`pseudocorrelation`}{A complex matrix. The computed pseudo-correlation coefficient matrix for the coefficients in the model.}
#' \item{`cov.unscaled`}{The unscaled covariance matrix; i.e, a complex matrix with real diagonal elements such that multiplying it by an estimate of the error variance produces an estimated covariance matrix for the coefficients.}
#' \item{`pcov.unscaled`}{The unscaled pseudo-covariance matrix; i.e, a complex matrix such that multiplying it by an estimate of the error pseudo-variance produces an estimated pseudo-covariance matrix for the coefficients.}
#' \item{`sigma`}{Numeric. The square root of the estimated variance of the random error.}
#' \item{`psigma`}{Complex. The square root of the estimated pseudo-variance of the random error. See details above.}
#' \item{`df`}{The number of degrees of freedom for the model and for residuals. A 3 element vector (p, n-p, p*), the first being the number of non-aliased coefficients, the last being the total number of coefficients.}
#' \item{`coefficients`}{A 5 column matrix that contains the model coefficients, their standard errors, their pseudo standard errors (see details above), their t statistics, and corresponding (two-sided) p-value. Aliased coefficients are omitted.}
#' \item{`aliased`}{Named logical vector showing if the original coefficients are aliased.}
#' \item{`terms`}{The terms object used in fitting this model.}
#' \item{`fstatistic`}{(for models including non-intercept terms) a 3 element numeric vector with the value of the F-statistic with its numerator and denominator degrees of freedom.}
#' \item{`r.squared`}{Numeric. The fraction of variance explained by the model.}
#' \item{`adj.r.squared`}{the above R^2 statistic "adjusted", penalizing for higher p.}
#' \item{`symbolic.cor`}{(only if `correlation` is true.) The value of the argument symbolic.cor.}
#' \item{`na.action`}{from `object`, if present there.}
#'
#' @export
#' @export summary.zlm
#'
#' @note `print.summary.zlm` calls `print.summary.rzlm`
#'
#' @seealso [lm], [rlm]
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' xx <- complex(real= rnorm(n), imaginary= rnorm(n))
#' tframe <- data.frame(x = xx, y= slop*xx + interc + e)
#' fit <- lm(y ~ x, data = tframe, weights = rep(1,n))
#' summ <- summary(fit)
#' print(summ)
summary.zlm <- function(object, correlation = FALSE, symbolic.cor = FALSE, ...)
{
z <- object
p <- z$rank
rdf <- z$df.residual
if (p == 0) {
r <- z$residuals
n <- length(r)
w <- z$weights
if (is.null(w)) {
rss <- sum(as.numeric(Conj(r)*r))
prss <- sum(r^2) # 'pseudo' sum of squared residuals.
} else {
rss <- sum(w * as.numeric(Conj(r)*r))
prss <- sum(r^2) # 'pseudo' sum of squared residuals.
r <- sqrt(w) * r
}
resvar <- rss/rdf # Variance of residuals.
respvar <- prss / rdf # Pseudo-variance of residuals.
ans <- z[c("call", "terms", if(!is.null(z$weights)) "weights")]
class(ans) <- c("summary.zlm", "summary.lm")
ans$aliased <- is.na(coef(object)) # used in print method
ans$residuals <- r
ans$df <- c(0L, n, length(ans$aliased))
ans$coefficients <- matrix(NA_real_, 0L, 5L, dimnames =
list(NULL, c("Estimate", "Std. Error", "Pseudo Std. Error", "t value", "Pr(>|t|)")))
ans$sigma <- sqrt(resvar)
ans$psigma <- sqrt(respvar) # Pseudo standard deviation of residuals.
ans$r.squared <- ans$adj.r.squared <- 0
ans$cov.unscaled <- matrix(NA_real_, 0L, 0L)
if (correlation) ans$correlation <- ans$cov.unscaled
return(ans)
}
if (is.null(z$terms))
stop("invalid 'lm' object: no 'terms' component")
if(!inherits(object, "lm"))
warning("calling summary.lm(<fake-lm-object>) ...")
#Qr <- stats:::qr.lm(object) # Internal function that just returns the thing in z$qr. Unless it's not there, in which case it gives an error.
try(Qr <- object$qr, stop("lm object does not have a proper 'qr' component. Rank zero or should not have used lm(.., qr=FALSE).")) # Replicates the behavior of the line above without calling a unexported function.
n <- NROW(Qr$qr)
if(is.na(z$df.residual) || n - p != z$df.residual)
warning("residual degrees of freedom in object suggest this is not an \"lm\" fit")
## do not want missing values substituted here
r <- z$residuals
f <- z$fitted.values
w <- z$weights
if (is.null(w)) {
mss <- if (attr(z$terms, "intercept"))
sum(as.numeric(Conj(f - mean(f))*(f - mean(f)))) else sum(as.numeric(Conj(f)*f)) # Sum of conjugate squared deviations from the mean of the fitted values.
pmss <- if (attr(z$terms, "intercept"))
sum((f - mean(f))^2) else sum(f^2) # Sum of squared deviations from the mean of the fitted values. A complex number.
rss <- sum(as.numeric(Conj(r)*r))
prss <- sum(r^2)
} else {
mss <- if (attr(z$terms, "intercept")) {
m <- sum(w * f / sum(w))
sum(w * as.numeric(Conj(f - m)*(f - m)))
} else sum(w * as.numeric(Conj(f)*f))
pmss <- if (attr(z$terms, "intercept")) {
m <- sum(w * f / sum(w))
sum(w * (f - m)^2)
} else sum(w * f^2)
rss <- sum(w * as.numeric(Conj(r)*r))
prss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/rdf # Variance of the residuals.
respvar <- prss/rdf # Pseudo variance of the residuals.
## see thread at https://stat.ethz.ch/pipermail/r-help/2014-March/367585.html
if (is.finite(resvar) &&
resvar < (as.numeric(Conj(mean(f))*mean(f)) + complexlm::var(c(f))) * 1e-30) # a few times .Machine$double.eps^2
warning("essentially perfect fit: summary may be unreliable")
p1 <- 1L:p
#R <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) # Replace this line with the following 4, taken from summary.rlm, since chol2inv() does not work with complex numbers.
R <- Qr$qr
R <- R[1L:p, 1L:p, drop = FALSE] # Trim the bottom of R, making it a square p by p matrix.
R[lower.tri(R)] <- 0 # Remove the lower triangular matrix, the Q from the QR decomposition. Now we just have R, the upper triangular matrix.
rinv <- solve(R) # This is efficient, we only need the diagonal matrix diag(p) to get rinv, so just set rinv <- diag(p) ahead of time. Now rinv is a different p by p matrix.
se <- sqrt( abs(Conj(rinv)*rinv) %*% rep(1,p) * resvar) #### Do I need diag(R) to be squared now? Or something different?
pse <- sqrt(rinv^2 %*% rep(1,p) * respvar)
est <- z$coefficients[Qr$pivot[p1]] # Account for pivot in the QR decomposition
tval <- est/se
ans <- z[c("call", "terms", if(!is.null(z$weights)) "weights")]
ans$residuals <- r
ans$coefficients <-
cbind("Estimate" = est, "Std. Error" = se, "Pseudo Std. Error" = pse, "t value" = tval,
"Pr(>|t|)" = 2*pt(abs(tval), rdf, lower.tail = FALSE))
dimnames(ans$coefficients)[[2]] <- c("Estimate", "Std. Error", "Pseudo Std. Error", "t value", "Pr(>|t|)")
ans$aliased <- is.na(z$coefficients) # used in print method
ans$sigma <- sqrt(resvar)
ans$psigma <- sqrt(respvar)
ans$df <- c(p, rdf, NCOL(Qr$qr))
if (p != attr(z$terms, "intercept")) {
df.int <- if (attr(z$terms, "intercept")) 1L else 0L
ans$r.squared <- mss/(mss + rss)
ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - df.int)/rdf)
ans$fstatistic <- c(value = (mss/(p - df.int))/resvar,
numdf = p - df.int, dendf = rdf)
} else ans$r.squared <- ans$adj.r.squared <- 0
ans$cov.unscaled <- rinv %*% t(Conj(rinv)) # Real on diagonal, complex on off-diagonal.
ans$pcov.unscaled <- rinv %*% t(rinv) # pseudo covariance, all complex.
dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,1)]
if (correlation) {
correl <- rinv * array(sqrt(resvar)/se, c(p, p))
ans$correlation <- correl %*% Conj(t(correl))
ans$pseudocorrelation <- correl %*% t(correl)
dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
ans$symbolic.cor <- symbolic.cor
}
if(!is.null(z$na.action)) ans$na.action <- z$na.action
class(ans) <- c("summary.zlm", "summary.lm")
return(ans)
}
## This function is based off of MASS::print.summary.rlm() because stats::print.summary.lm() has many fancy features that call functions which don't play well with complex numbers, namely format.pval().
## At the moment, the quantiles are calculated by discarding the imaginary components, which is not very useful. It could be improved by using one of the definitions of multivariate quantile.
#' @describeIn summary.zlm S3 method for class 'summary.zlm'
#'
#' @param x a 'zlm' object or an 'zlm' summary object. Used for `print.summary.zlm`
#' @param digits the number of digits to include in the printed report, default is three. Used for `print.summary.zlm`
#' @param quantiles logical. Should the (inaccurate) quantiles of the residuals be printed? If `FALSE` [summary.complex] is applied to the residuals instead.
#'
#' @note For complex fits the quantiles reported by this function are based on sorting the real parts of the residuals. They should not be considered reliable..
#'
#' @export
print.summary.zlm <-
function (x, digits = max(3L, getOption("digits") - 3L),
symbolic.cor = x$symbolic.cor, quantiles = FALSE, ...)
{
cll <- match.call()
cat("\nCall: ")
dput(x$call, control=NULL)
resid <- x$residuals
df <- x$df
if (length(x$df) == 1) rdf <- df
else rdf <- df[2L]
print(rdf)
cat(if (!is.null(x$weights) && diff(range(x$weights))) "Weighted ",
"Residuals:\n", sep="")
if(rdf > 5L) {
if(quantiles) {
if(length(dim(resid)) == 2L) {
rq <- apply(Conj(t(resid)), 1L, quantile)
dimnames(rq) <- list(c("Min", "1Q", "Median", "3Q", "Max"),
colnames(resid))
} else {
rq <- quantile(resid)
names(rq) <- c("Min", "1Q", "Median", "3Q", "Max")
}
print(rq, digits = digits, ...)
print("NOTE: Quantiles generated by sorting real components only. Do not rely upon them.")
}
else {
print(summary(resid), digits = digits, ...)
}
} else if(rdf > 0L) {
print(resid, digits = digits, ...)
}
if(nsingular <- df[3L] - df[1L])
cat("\nCoefficients: (", nsingular,
" not defined because of singularities)\n", sep = "")
else cat("\nCoefficients:\n")
print(format(round(x$coefficients, digits = digits)), quote = FALSE, ...)
### Are the t value and p value actually meaningful for complex data? As they stand, probably not.
cat("\nResidual standard error:", format(signif(x$sigma, digits)),
"on", rdf, "degrees of freedom\n")
if(nzchar(mess <- naprint(x$na.action))) cat(" (", mess, ")\n", sep="")
if(!is.null(correl <- x$correlation)) {
p <- dim(correl)[2L]
if(p > 1L) {
cat("\nCorrelation of Coefficients:\n")
ll <- lower.tri(correl)
correl[ll] <- format(round(correl[ll], digits))
correl[!ll] <- ""
print(correl[-1L, -p, drop = FALSE], quote = FALSE, digits = digits, ...)
}
}
invisible(x)
}
#' Calculate Variance-Covariance Matrix and Pseudo Variance-Covariance Matrix for a Complex Fitted Model Object
#'
#' A method for of [stats::vcov] that is compatible with complex linear models. In addition to variance-covariance matrix,
#' the pseudo variance-covariance matrix, which is a measure of the covariance between real and imaginary components, is returned as well.
#' Can also return the "double covariance" matrix, which combines the information of the covariance matrix and the pseudo-covariance matrix, as described in (van den Bos 1995).
#' While not as compact as two separate smaller matrices, the double covariance matrix simplifies calculations such as the [mahalanobis] distance.
#'
#'
#' @param object Typically a fitted model object of class "zlm" and/or "rzlm". Sometimes also a summary() object of such a fitted model.
#' @param complete logical. Indicates if the full covariance and pseudo-covariance matrices should be returned even in the case of an over-determined system, meaning that some coefficients are undefined.
#' @param merge logical. Should the covariance matrix and pseudo-covariance / relational matrix be merged into one matrix of twice the dimensions? Default is TRUE.
#' @param ... Additional parameters, not currently used for anything.
#'
#' @return
#' If `merge` is false, a list containing both the numeric variance-covariance matrix, and the complex pseudo variance-covariance matrix.
#' If `merge` is true, a large matrix (both dimensions being twice the number of coefficients) containing both the variance-covariance matrix and the pseudo variance-covariance matrix, merged together.
#' @exportS3Method vcov zlm
#' @export vcov.zlm
#' @export
#'
#' @references A. van den Bos, The Multivariate Complex Normal Distribution-a Generalization, IEEE Trans. Inform. Theory 41, 537 (1995).
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' err <- complex(real = rnorm(n)/16, imaginary = rnorm(n)/16)
#' tframe <- data.frame(x= x <- complex(real=rnorm(n), imaginary= rnorm(n)), y=slop*x + interc+err)
#' fit <- lm(y ~ x, data = tframe, weights = rep(1,n))
#' vcov(fit)
vcov.zlm <- function (object, complete = TRUE, merge = TRUE, ...)
{
so <- summary(object, corr = FALSE)
varcovar <- .vcov.aliased.complex(aliased = so$aliased, vc = so$sigma^2 * so$cov.unscaled, complete = complete)
pseudovarcovar <- .vcov.aliased.complex(aliased = so$aliased, vc = so$psigma^2 * so$pcov.unscaled, complete = complete)
if (merge == TRUE)
{
#bigcovar <- diag(rep(diag(varcovar), each = 2)) # Start by making a square diagonal matrix with two adjacent diagonal elements for each variance.
n <- attributes(varcovar)$dim[1] # The size of the square covariance matrix.
#print(attributes(varcovar)) # Used for debugging
bigcovar <- matrix(0, ncol = 2*n, nrow = 2*n) #Start by making an empty matrix with dimensions twice those of the small covariance matrix.
bigcovar[,seq(1, 2*n, 2)] <- matrix(as.vector(rbind(as.vector(varcovar), as.vector(Conj(pseudovarcovar)))), nrow = 2*n) # Fill the odd indexed columns of bigcovar with the entries from varcovar interleaved with the entries from pseudovarcovar conjugated.
bigcovar[,seq(2, 2*n, 2)] <- matrix(as.vector(rbind(as.vector(pseudovarcovar), as.vector(varcovar))), nrow = 2*n) # Fill the even indexed columns of bigcovar with the entries from varcovar interleaved with the entries from pseudovarcovar, this time the later first.
## The lines above could be replaced with a call to matrixweave.
return(bigcovar)
}
else return(list(varcovar = as.numeric(varcovar), pseudovarcovar = pseudovarcovar))
}
## Copied from stats::vcov , modified to used NA_complex_ instead of NA_real_.
#' @describeIn vcov.zlm auxiliary function for dealing with singular model fits. See [stats::vcov].
#'
#' @param aliased a logical vector typically identical to `is.na(coef(.))` indicating which coefficients are 'aliased'.
#' @param vc a variance-covariance matrix, typically "incomplete", i.e., with no rows and columns for aliased coefficients.
#' @param complete logical. Indicates if the full covariance and pseudo-covariance matrices should be returned even in the case of an over-determined system, meaning that some coefficients are undefined.
#'
#' @export
.vcov.aliased.complex <- function(aliased, vc, complete = TRUE) {
## Checking for "NA coef": "same" code as in print.summary.lm() in ./lm.R :
if(complete && NROW(vc) < (P <- length(aliased)) && any(aliased)) {
## add NA rows and columns in vcov
cn <- names(aliased)
VC <- matrix(NA_complex_, P, P, dimnames = list(cn,cn))
j <- which(!aliased)
VC[j,j] <- vc
VC
} else # default
vc
}
#' ANOVA for Complex Linear Fits
#'
#' A very simple adaptation of [stats::anova.lm] which can handle fits of complex variables.
#' The only change was to take the absolute value of squared residuals, and eliminate quantile based features.
#' Note that this function uses the variance, not the pseudo-variance. An analysis of pseudo-variance (ANOPVA)
#' is also possible (and maybe useful), but not yet implemented.
#'
#' @inherit stats::anova.lm params details return references
#'
#' @param object objects of class "zlm", usually produced by [complexlm::lm].
#' @param ... Other arguments.
#'
#' @return An object of class "anova", which inherits from class "data.frame". Contains a analysis of variance table, except for those components that rely on quantiles.
#' @export
#' @export anova.zlm
#' @exportS3Method anova zlm
#'
#' @seealso [complexlm::lm], [anova]
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' err <- complex(real = rnorm(n)/16, imaginary = rnorm(n)/16)
#' tframe <- data.frame(x= x <- complex(real=rnorm(n), imaginary= rnorm(n)), y=slop*x + interc+err)
#' fit <- lm(y ~ x, data = tframe, weights = rep(1,n))
#' anova(fit)
#' robfit <- rlm(y ~ x, data = tframe)
#' anova(fit, robfit)
anova.zlm <- function(object, ...)
{
if(length(list(object, ...)) > 1L) return(anova.zlmlist(object, ...))
if(!inherits(object, "lm")) warning("calling anova.lm(<fake-lm-object>) ...")
w <- object$weights
ssr <- sum(if(is.null(w)) abs(object$residuals^2) else w*abs(object$residuals^2)) # Algebraically equivalent to Conj(residuals)*residuals, though may not be numerically equivalent.
mss <- sum(if(is.null(w)) abs(object$fitted.values^2) else w*abs(object$fitted.values^2))
if(ssr < 1e-10*mss)
warning("ANOVA F-tests on an essentially perfect fit are unreliable")
dfr <- df.residual(object)
p <- object$rank
if(p > 0L) {
p1 <- 1L:p
comp <- object$effects[p1]
# asgn <- object$assign[qr.lm(object)$pivot][p1]
try(asgn <- object$assign[object$qr$pivot][p1], stop("lm object does not have a proper 'qr' component. Rank zero or should not have used lm(.., qr=FALSE).")) # Replicates the behavior of the line above without calling a unexported function.
nmeffects <- c("(Intercept)", attr(object$terms, "term.labels"))
tlabels <- nmeffects[1 + unique(asgn)]
ss <- c(vapply( split(abs(Conj(comp)*comp),asgn), sum, 1.0), ssr)
df <- c(lengths(split(asgn, asgn)), dfr)
} else {
ss <- ssr
df <- dfr
tlabels <- character()
}
ms <- ss/df
f <- ms/(ssr/dfr)
#P <- pf(f, df, dfr, lower.tail = FALSE)
table <- data.frame(df, ss, ms, f)
table[length(df), 4:4] <- NA
dimnames(table) <- list(c(tlabels, "Residuals"),
c("Df","Sum Sq", "Mean Sq", "F value"))
if(attr(object$terms,"intercept")) table <- table[-1, ]
structure(table, heading = c("Analysis of Variance Table\n",
paste("Response:", deparse(formula(object)[[2L]]))),
class = c("anova", "data.frame"))# was tabular
}
#' @describeIn anova.zlm s3 method for class 'zlmlist'
#' @export
anova.zlmlist <- function (object, ..., scale = 0, test = "F")
{
objects <- list(object, ...)
responses <- as.character(lapply(objects,
function(x) deparse(x$terms[[2L]])))
sameresp <- responses == responses[1L]
if (!all(sameresp)) {
objects <- objects[sameresp]
warning(gettextf("models with response %s removed because response differs from model 1",
sQuote(deparse(responses[!sameresp]))),
domain = NA)
}
ns <- sapply(objects, function(x) length(x$residuals))
if(any(ns != ns[1L]))
stop("models were not all fitted to the same size of dataset")
## calculate the number of models
nmodels <- length(objects)
if (nmodels == 1)
return(anova.zlm(object))
## extract statistics
resdf <- as.numeric(lapply(objects, df.residual))
resdev <- as.numeric(lapply(objects, function(x) abs(deviance(x))))
## construct table and title
table <- data.frame(resdf, resdev, c(NA, -diff(resdf)),
c(NA, -diff(resdev)) )
variables <- lapply(objects, function(x)
paste(deparse(formula(x)), collapse="\n") )
dimnames(table) <- list(1L:nmodels,
c("Res.Df", "RSS", "Df", "Sum of Sq"))
title <- "Analysis of Variance Table\n"
topnote <- paste0("Model ", format(1L:nmodels), ": ", variables,
collapse = "\n")
## calculate test statistic if needed
if(!is.null(test)) {
bigmodel <- order(resdf)[1L]
scale <- if(scale > 0) scale else resdev[bigmodel]/resdf[bigmodel]
table <- stat.anova(table = table, test = test,
scale = scale,
df.scale = resdf[bigmodel],
n = length(objects[[bigmodel]]$residuals))
}
structure(table, heading = c(title, topnote),
class = c("anova", "data.frame"))
}
#' Generate the Hat Matrix or Leverage Scores of a Complex Linear Model
#'
#' This function returns either the full hat matrix (AKA the projection matrix) of a complex "lm" or "rlm" object, or the diagonal elements of same.
#' The later are also known as the influence scores.
#' It performs the same basic role as [stats::hat] and [stats::hatvalues] do for numeric fits, but is quite a bit simpler
#' and rather less versatile.
#' \loadmathjax
#'
#' @param model A complex linear fit object, of class "zlm" or "rzlm". An object with numeric residuals will produce a warning and NULL output.
#' @param full Logical. If TRUE, return the entire hat matrix. If FALSE, return a vector of the diagonal elements of the hat matrix. These are the influence scores. Default is FALSE.
#' @param ... Additional arguments. Not used.
#'
#' @details For unweighted least-squares fits the hat matrix is calculated from the model matrix, \eqn{X = }`model$x`, as \cr
#' \mjdeqn{H = X (X^t X)^{-1} X^t}{H = X (X^t X)^{-1} X^t}\cr
#' For rlm or weighted least-squares fits the hat matrix is calculated as\cr
#' \mjdeqn{H = X (X^t W X)^{-1} X^t W}{H = X (X^t W X)^{-1} X^t W}
#' Where \eqn{^t} represents conjugate transpose, and \eqn{W} is the identity matrix times the user provided weights and the final IWLS weights if present. \cr
#' Note that this function requires that the model matrix be returned when calling [lm] or [rlm].\cr
#' The diagonals will be purely real, and are converted to numeric if `full == FALSE`.
#'
#' @return Either a \eqn{(n x n)} complex matrix or a length \eqn{n} numeric vector.
#' @export
#'
#' @seealso [stats::hatvalues], [stats::hat], [cooks.distance]
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' xx <- complex(real= rnorm(n), imaginary= rnorm(n))
#' tframe <- data.frame(x = xx, y= slop*xx + interc + e)
#' fit <- lm(y ~ x, data = tframe, weights = rep(1,n))
#' zhatvalues(fit)
zhatvalues <- function(model, full = FALSE, ...)
{
if (!is.complex(model$residuals))
{
warning('This is for complex models only. Please use stats::hatvalues() instead.')
return(NULL)
}
X <- model$x
if ((is.null(model$weights) || all(model$weights == 1)) && !inherits(model, 'rlm')) {
# Clever computation with the QR decomposition failed to produce something reasonable. Do it the slow way.
hat <- X %*% solve((Conj(t(X)) %*% X)) %*% Conj(t(X))
}
else {
if (inherits(model, 'rlm')) wmat <- diag(ifelse(is.null(model$weights), 1, model$weights) * model$w) # If rlm, take into account the user supplied weights and the IWLS weights.
else wmat <- diag(model$weights)
hat <- X %*% solve((Conj(t(X)) %*% wmat %*% X)) %*% Conj(t(X)) %*% wmat
}
if (full) return(hat)
else return(Re(diag(hat)))
}
#' Standardized Residuals from Ordinary or Robust Linear fits with Complex Variables
#'
#' Generates a vector of residuals from the given complex linear model that are normalized to have unit variance.
#' Similar to [stats::rstandard], which this function calls if given numeric input.
#'
#' @param model An object of class "zlm", "rzlm", "lm", or "rlm". Can be complex or numeric.
#' @param lever A list of leverage scores with the same length as `model$residuals`. By default [zhatvalues] is called on `model`.
#' @param ... Other parameters. Only used if `model` is numeric; in which case they are passed to `stats::rstandard`.
#'
#' @details The standardized residuals are calculated as,\cr
#' \deqn{r' = r / ( s \sqrt{1 - lever} )}\cr
#' Where \eqn{r} is the residual vector and \eqn{s} is the residual standard error for "zlm" objects
#' or the robust scale estimate for "rzlm" objects.
#'
#' @note This is a much simpler function than [stats::rstandard].
#' It cannot perform leave-one-out cross validation residuals, or anything else not mentioned here.
#'
#' @return A complex vector of length equal to that of the residuals of `model`. Numeric for numeric input.
#' @export
#'
#' @seealso [stats::rstandard], [stats::rstandard.lm],
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' xx <- complex(real= rnorm(n), imaginary= rnorm(n))
#' tframe <- data.frame(x = xx, y= slop*xx + interc + e)
#' fit <- lm(y ~ x, data = tframe, weights = rep(1,n))
#' rstandard(fit)
rstandard.zlm <- function(model, lever = zhatvalues(model), ...)
{
cll <- match.call()
if (is.numeric(model$residuals))
{
cll[[1]] <- stats::rstandard
eval(cll, parent.frame())
}
else
{
p <- model$rank
s <- if (inherits(model, "rlm")) model$s # This is the residual standard error.
else sqrt(abs(deviance(model))/df.residual(model)) # deviance is the sum of residuals squared. The abs() gives us sum (r*r), which is numeric.
return((model$residuals) / (s * sqrt(1 - lever)))
}
}
#' Cook's Distance for Complex Linear Models
#'
#' Calculates the Cook's distances (technically a divergence, i.e. distance squared) of a complex linear model.
#' These serve as a measurement of how much each input data point had on the model.\loadmathjax
#'
#' @param model An object of class "lm" or "rlm". Can be complex or numeric.
#' @param lever A list of leverage scores with the same length as `model$residuals`. By default [zhatvalues] is called on `model`.
#' @param ... Other parameters. Only used if `model` is numeric; in which case they are passed to `stats::cooks.distance`.
#'
#' @details Consider a linear model relating a response vector `y` to a predictor vector `x`, both of length `n`. Using the model and predictor vector we can
#' calculate a vector of predicted values `yh`. `y` and `yh` are points in a `n` dimensional output space. If we drop the `i`-th element of `x` and `y`, then fit another
#' model using the "dropped `i`" vectors, we can get another point in output space, `yhi`. The squared Euclidean distance between `yh` and `yhi`, divided by the
#' rank of the model (`p`) times its mean squared error `s^2`, is the `i`-th Cook's distance.\cr
#' \mjdeqn{D_i = (yh - yhi)^t (yh - yhi) / p s^2}{D_i = (yh - yhi)^t (yh - yhi) / p s^2}\cr
#' A more elegant way to calculate it, which this function uses, is with the influence scores, `hii`.\cr
#' \mjdeqn{D_i = |r_i|^2 / p s^2 hii / (1 - hii)}{D_i = |r_i|^2 / p s^2 hii / (1 - hii)}\cr
#' Where \eqn{r_i} is the \eqn{i}-th residual, and \eqn{^t} is the conjugate transpose.
#'
#' @note This is a simpler function than [stats::cooks.distance], and does not understand any additional parameters not listed in this entry.
#'
#' @references R. D. Cook, Influential Observations in Linear Regression, Journal of the American Statistical Association 74, 169 (1979).
#'
#' @return A numeric vector. The elements are the Cook's distances of each data point in `model`.
#' @export
#'
#' @seealso [stats::cooks.distance], [zhatvalues]
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' xx <- complex(real= rnorm(n), imaginary= rnorm(n))
#' tframe <- data.frame(x = xx, y= slop*xx + interc + e)
#' fit <- lm(y ~ x, data = tframe, weights = rep(1,n))
#' cooks.distance(fit)
cooks.distance.zlm <- function(model, lever = zhatvalues(model), ...)
{
cll <- match.call()
if (is.numeric(model$residuals))
{
#print("theuano")
cll[[1]] <- stats::cooks.distance
eval(cll, parent.frame())
}
else
{
p <- model$rank
s <- if (inherits(model, "rlm")) model$s # This is the residual standard error.
else sqrt(abs(deviance(model))/df.residual(model)) # deviance is the sum of residuals squared. The abs() gives us sum (r*r), which is numeric.
return(as.numeric((Conj(model$residuals) * model$residuals) / (p * s^2)) * (lever / (1 - lever)^2)) # As a distance, this will be positive real.
}
}
#' Plot Diagnostics for a Complex Linear Model Objects
#'
#' A modified version of [stats::plot.lm] used for visualizing ordinary ("zlm") and robust ("rzlm")
#' linear models of complex variables. This documentation entry describes the complex version, focusing on the
#' differences and changes from the numeric. For further explanation of the plots please see [stats::plot.lm].
#'
#' @inherit stats::plot.lm params
#'
#' @param x complex lm object ("zlm" or "rzlm"). Typically produced by [complexlm::lm] or [complexlm::rlm].
#' @param which If a subset of the plots is required, specify a subset of the numbers 1:6, except 2. See [stats::plot.lm], and below, for the different kinds. Default is c(1,3,5).
#'
#' @details Five of the six plots generated by [stats::plot.lm] can be produced by this function:
#' The residuals vs. fitted values plot, the scale-location plot, the plot of Cook's distances vs. row labels,
#' the plot of residuals vs. leverages, and the plot of Cook's distances vs. leverage/(1-leverage). The Q-Q plot is
#' \emph{not} drawn because it requires quantiles, which are not unambiguously defined for complex numbers.
#' Because complex numbers are two dimensional, [pairs] is used to create multiple scatter plots of the real
#' and imaginary components for the residuals vs. fitted values and scale-location plots.
#'
#' @return Several diagnostic plots.
#' @export
#'
#' @references
#' Belsley, D. A., Kuh, E. and Welsch, R. E. (1980). Regression Diagnostics. New York: Wiley.
#'
#' Cook, R. D. and Weisberg, S. (1982). Residuals and Influence in Regression. London: Chapman and Hall.
#'
#' Firth, D. (1991) Generalized Linear Models. In Hinkley, D. V. and Reid, N. and Snell, E. J., eds: Pp. 55-82 in Statistical Theory and Modelling. In Honour of Sir David Cox, FRS. London: Chapman and Hall.
#'
#' Hinkley, D. V. (1975). On power transformations to symmetry. Biometrika, 62, 101-111. doi: 10.2307/2334491.
#'
#' McCullagh, P. and Nelder, J. A. (1989). Generalized Linear Models. London: Chapman and Hall.
#'
#' @seealso [zhatvalues], [cooks.distance], [complexlm::lm], [complexlm::rlm]
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slop <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' xx <- complex(real= rnorm(n), imaginary= rnorm(n))
#' tframe <- data.frame(x = xx, y= slop*xx + interc + e)
#' fit <- lm(y ~ x, data = tframe, weights = rep(1,n))
#' plot(fit)
plot.zlm <- function(x, which = c(1,3,5), ## was which = 1L:4L,
caption = list("Residuals vs Fitted",
"Scale-Location", "Cook's distance",
"Residuals vs Leverage",
expression("Cook's dist vs Leverage " * h[ii] / (1 - h[ii]))),
panel = if(add.smooth) function(x, y, ...)
panel.smooth(x, y, iter=iter.smooth, ...) else points,
sub.caption = NULL, main = "",
ask = prod(par("mfcol")) < length(which) && dev.interactive(), ...,
id.n = 3, labels.id = names(residuals(x)), cex.id = 0.75,
cook.levels = c(0.5, 1.0),
add.smooth = getOption("add.smooth"),
iter.smooth = if(isGlm # && binomialLike
) 0 else 3,
label.pos = c(4,2), cex.caption = 1, cex.oma.main = 1.25) ## Looks like I can do the leverage, the scale-location, and the residuaals vs fitted.
{
dropInf <- function(x, h) {
if(any(isInf <- h >= 1.0)) {
warning(gettextf("not plotting observations with leverage one:\n %s",
paste(which(isInf), collapse=", ")),
call. = FALSE, domain = NA)
x[isInf] <- NaN
}
x
}
if (!inherits(x, "lm"))
stop("use only with \"lm\" objects")
if(!is.numeric(which) || any(which < 1) || any(which > 6))
stop("'which' must be in 1:6")
if((isGlm <- inherits(x, "glm")))
binomialLike <- family(x)$family == "binomial" # || "multinomial" (maybe)
show <- rep(FALSE, 6)
show[which] <- TRUE
r <- if(isGlm) residuals(x, type="pearson") else residuals(x)
yh <- predict(x) # != fitted() for glm
w <- weights(x)
if(!is.null(w)) { # drop obs with zero wt: PR#6640
wind <- w != 0
r <- r[wind]
yh <- yh[wind]
w <- w[wind]
labels.id <- labels.id[wind]
}
n <- length(r)
if (any(show[2L:6L])) {
s <- if (inherits(x, "rlm")) x$s
else if(isGlm) sqrt(summary(x)$dispersion)
else sqrt(deviance(x)/df.residual(x)) # deviance is the sum of residuals squared.
hii <- zhatvalues(x) # calculate the influence scores.
if (any(show[4L:6L])) {
cook <- cooks.distance(x, lever = hii)
## cook <-
## if (isGlm)
## cooks.distance (x, infl = infl)
## else cooks.distance(x, infl = infl, sd = s, res = r, hat = hii)
}
}
if (any(show[c(2L,3L,5L)])) {
## (Defensive programming used when fusing code for 2:3 and 5)
ylab5 <- ylab23 <- if(isGlm) "Std. Pearson resid." else "Standardized residuals"
r.w <- if (is.null(w)) r else sqrt(w) * r
## NB: rs is already NaN if r=0, hii=1
rsp <- rs <- dropInf( if (isGlm) rstandard(x, type="pearson") else r.w/(s * sqrt(1 - hii)), hii )
}
if (any(show[5L:6L])) { # using 'leverages'
r.hat <- range(hii, na.rm = TRUE) # though should never have NA
isConst.hat <- all(r.hat == 0) ||
diff(r.hat) < 1e-10 * mean(hii, na.rm = TRUE)
}
if (any(show[c(1L, 3L)]))
l.fit <- if (isGlm) "Predicted values" else "Fitted values"
if (is.null(id.n))
id.n <- 0
else {
id.n <- as.integer(id.n)
if(id.n < 0L || id.n > n)
stop(gettextf("'id.n' must be in {1,..,%d}", n), domain = NA)
}
if(id.n > 0L) { ## label the largest residuals
if(is.null(labels.id))
labels.id <- paste(1L:n)
iid <- 1L:id.n
show.r <- sort.list(abs(median(r) - r), decreasing = TRUE)[iid] # sort based on distance from median.
if(any(show[2L:3L]))
show.rs <- sort.list(abs(median(rs) - rs), decreasing = TRUE)[iid] # sort based on distance from the median.
text.id <- function(x, y, ind, adj.x = TRUE) {
labpos <-
if(adj.x) label.pos[1+as.numeric(abs(x) > abs(mean(range(x))))] else 3
text(x, y, labels.id[ind], cex = cex.id, xpd = TRUE,
pos = labpos, offset = 0.25)
}
}
getCaption <- function(k) # allow caption = "" , plotmath etc
if(length(caption) < k) NA_character_ else as.graphicsAnnot(caption[[k]])
if(is.null(sub.caption)) { ## construct a default:
cal <- x$call
if (!is.na(m.f <- match("formula", names(cal)))) {
cal <- cal[c(1, m.f)]
names(cal)[2L] <- "" # drop " formula = "
}
cc <- deparse(cal, 80) # (80, 75) are ``parameters''
nc <- nchar(cc[1L], "c")
abbr <- length(cc) > 1 || nc > 75
sub.caption <-
if(abbr) paste(substr(cc[1L], 1L, min(75L, nc)), "...") else cc[1L]
}
one.fig <- prod(par("mfcol")) == 1
if (ask) {
oask <- devAskNewPage(TRUE)
on.exit(devAskNewPage(oask))
}
##---------- Do the individual plots : ----------
if (show[1L]) { # Residuals vs. Fitted Values
# ylim <- range(Im(r), na.rm=TRUE)
# xlim <- range(Re(r), na.rm=TRUE)
# if(id.n > 0) {
# ylim <- extendrange(r = ylim, f = 0.08)
# xlim <- extendrange(r = xlim, f = 0.08)
# }
## pairs seems to do a good job of setting the limits.
resfitdf <- data.frame(rer = Re(r), imr = Im(r), reyh = Re(yh), imyh = Im(yh))
dev.hold()
#plot(yh, r, xlab = l.fit, ylab = "Residuals", main = main,
# ylim = ylim, type = "n", ...)
plot(resfitdf, labels = c("Re(Residuals)", "Im(Residuals)", "Re(Fitted Values)", "Im(Fitted Values)"), panel = panel) # Uses pairs to draw a matrix of scatterplots.
#panel(yh, r, ...)
if (one.fig)
title(sub = sub.caption, ...)
mtext(getCaption(1), side = 3, line = 2.8, cex = cex.caption)
# if(id.n > 0) { # This won't look right with the pairs scatterplot matrix.
# y.id <- r[show.r] # arrange r decreasing distance from median.
# y.id[y.id < 0] <- y.id[y.id < 0] - strheight(" ")/3
# text.id(yh[show.r], y.id, show.r)
# }
abline(h = 0, lty = 3, col = "gray")
dev.flush()
}
# if (show[2L]) { ## Normal # Q-Q plot, not available without definition of complex quantile.
# ylim <- range(rs, na.rm=TRUE)
# ylim[2L] <- ylim[2L] + diff(ylim) * 0.075
# dev.hold()
# qq <- qqnorm(rs, main = main, ylab = ylab23, ylim = ylim, ...)
# if (qqline) qqline(rs, lty = 3, col = "gray50")
# if (one.fig)
# title(sub = sub.caption, ...)
# mtext(getCaption(2), 3, 0.25, cex = cex.caption)
# if(id.n > 0)
# text.id(qq$x[show.rs], qq$y[show.rs], show.rs)
# dev.flush()
# }
if (show[3L]) { # fitted values vs. sqrt abs standardized residuals. AKA Scale-Location Plot
sqrtabsr <- sqrt(abs(rs))
#ylim <- c(0, max(sqrtabsr, na.rm=TRUE))
yl <- as.expression(substitute(sqrt(abs(YL)), list(YL=as.name(ylab23))))
yhn0 <- if(is.null(w)) yh else yh[w!=0]
dev.hold()
scalocdf <- data.frame(sqrtabsr, reyhn0 = Re(yhn0), imyhn0 = Im(yhn0))
plot(scalocdf, labels = c(yl, "Re(Fitted Values)", "Im(Fitted Values)"), panel = panel)
if (one.fig)
title(sub = sub.caption, ...)
mtext(getCaption(2), side = 3, line = 2.8, cex = cex.caption)
#if(id.n > 0)
# text.id(yhn0[show.rs], sqrtabsr[show.rs], show.rs) # I don't think this would work well in pairs().
dev.flush()
}
if (show[4L]) { ## Cook's Distances
if(id.n > 0) {
show.r <- order(-cook)[iid]# index of largest 'id.n' ones
ymx <- cook[show.r[1L]] * 1.075
} else ymx <- max(cook, na.rm = TRUE)
dev.hold()
plot(cook, type = "h", ylim = c(0, ymx), main = main,
xlab = "Obs. number", ylab = "Cook's distance", ...) # What is the x axis here..? Oh, just an index.
if (one.fig)
title(sub = sub.caption, ...)
mtext(getCaption(3), side = 3, line = 0.25, cex = cex.caption)
if(id.n > 0)
text.id(show.r, cook[show.r], show.r, adj.x=FALSE)
dev.flush()
}
if (show[5L]) {
### Now handled earlier, consistently with 2:3, except variable naming
## ylab5 <- if (isGlm) "Std. Pearson resid." else "Standardized residuals"
## r.w <- residuals(x, "pearson")
## if(!is.null(w)) r.w <- r.w[wind] # drop 0-weight cases
## rsp <- dropInf( r.w/(s * sqrt(1 - hii)), hii )
ylim <- range(rsp, na.rm = TRUE)
ylim <- c(Re(ylim), Im(ylim))
ylim <- c(min(ylim), max(ylim)) # The smallest and largest value of real or imaginary rsp.
if (id.n > 0) {
ylim <- extendrange(r = ylim, f = 0.08)
show.rsp <- order(-cook)[iid]
}
do.plot <- TRUE
if(isConst.hat) { ## leverages are all the same
if(missing(caption)) # set different default
caption[[4L]] <- "Constant Leverage:\n Residuals vs Factor Levels"
## plot against factor-level combinations instead
aterms <- attributes(terms(x))
## classes w/o response
dcl <- aterms$dataClasses[ -aterms$response ]
facvars <- names(dcl)[dcl %in% c("factor", "ordered")]
mf <- model.frame(x)[facvars]# better than x$model
if(ncol(mf) > 0) {
dm <- data.matrix(mf)
## #{levels} for each of the factors:
nf <- length(nlev <- unlist(unname(lapply(x$xlevels, length))))
ff <- if(nf == 1) 1 else rev(cumprod(c(1, nlev[nf:2])))
facval <- (dm-1) %*% ff
xx <- facval # for use in do.plot section.
dev.hold()
plot(facval, Re(rsp), xlim = c(-1/2, sum((nlev-1) * ff) + 1/2),
ylim = ylim, xaxt = "n",
main = main, xlab = "Factor Level Combinations",
ylab = ylab5, type = "n", ...)
#points(facval, Im(rsp), type = 'n', pch = 5) # Redundant.
axis(1, at = ff[1L]*(1L:nlev[1L] - 1/2) - 1/2,
labels = x$xlevels[[1L]])
mtext(paste(facvars[1L],":"), side = 1, line = 0.25, adj=-.05)
abline(v = ff[1L]*(0:nlev[1L]) - 1/2, col="gray", lty="F4")
panel(facval, Re(rsp), col.smooth = "black", ...)
panel(facval, Im(rsp), col = "blue", pch = 5, col.smooth = "blue", ...)
abline(h = 0, lty = 3, col = "gray")
legend("bottomleft", legend = c("Real", "Imaginary"),
lty = c(1,1), col = c("black", "blue"), pch = c(1, 5) , bty = "n")
dev.flush()
}
else { # no factors
message(gettextf("hat values (leverages) are all = %s\n and there are no factor predictors; no plot no. 5",
format(mean(r.hat))),
domain = NA)
frame()
do.plot <- FALSE
}
}
else { ## Residual vs Leverage # This plot might be cool as a 3d plot with the Cook's distance isos as surfaces of rotation, I don't know how to draw one though.
xx <- hii
## omit hatvalues of 1.
xx[xx >= 1] <- NA
dev.hold()
plot(xx, Re(rsp), xlim = c(0, max(xx, na.rm = TRUE)), ylim = ylim,
main = main, xlab = "Leverage", ylab = ylab5, type = "n",
...)
#points(xx, Im(rsp), type = 'n', pch = 5, col = "blue") # Extraneous?
panel(xx, Re(rsp), col.smooth = "black", ...) # real
panel(xx, Im(rsp), col = "blue", pch = 5, col.smooth = "blue", ...) # imaginary
panel(xx, Mod(rsp), col = "purple", pch = 0, col.smooth = "purple", ...) # Modulus / magnitude
abline(h = 0, v = 0, lty = 3, col = "gray")
if (one.fig)
title(sub = sub.caption, ...)
if(length(cook.levels)) { # Draws the Cook's distance iso-lines.
p <- x$rank # not length(coef(x))
usr <- par("usr")
hh <- seq.int(min(r.hat[1L], r.hat[2L]/100), usr[2L],
length.out = 101)
for(crit in cook.levels) {
cl.h <- sqrt(crit*p*(1-hh)/hh)
lines(hh, cl.h, lty = 2, col = 2)
lines(hh,-cl.h, lty = 2, col = 2)
}
legend("bottomleft", legend = c("Cook's distance", "Real", "Imaginary", "Modulus"),
lty = c(2,1,1), col = c("red", "black", "blue", "purple"), pch = c(NA_integer_, 1, 5, 0) , bty = "n")
xmax <- min(0.99, usr[2L])
ymult <- sqrt(p*(1-xmax)/xmax)
aty <- sqrt(cook.levels)*ymult
axis(4, at = c(-rev(aty), aty),
labels = paste(c(rev(cook.levels), cook.levels)),
mgp = c(.25,.25,0), las = 2, tck = 0,
cex.axis = cex.id, col.axis = 2)
}
dev.flush()
} # if(const h_ii) .. else ..
if (do.plot) {
mtext(getCaption(4), side = 3, line = 0.25, cex = cex.caption)
if (id.n > 0) {
y.id <- rsp[show.rsp]
y.id[(Re(y.id) < 0 | Im(y.id) < 0)] <- y.id[(Re(y.id) < 0 | Im(y.id) < 0)] - strheight(" ")/3
text.id(xx[show.rsp], Re(y.id), show.rsp)
text.id(xx[show.rsp], Im(y.id), show.rsp)
}
}
}
if (show[6L]) { # Secret plot? seems to plot Cook's distance vs a kind of leverage score.
g <- dropInf( hii/(1-hii), hii )
ymx <- max(cook, na.rm = TRUE)*1.025
dev.hold()
plot(g, cook, xlim = c(0, max(g, na.rm=TRUE)), ylim = c(0, ymx),
main = main, ylab = "Cook's distance",
xlab = expression("Leverage " * h[ii]),
xaxt = "n", type = "n", ...)
panel(g, cook, ...)
## Label axis with h_ii values
athat <- pretty(hii)
axis(1, at = athat/(1-athat), labels = paste(athat))
if (one.fig)
title(sub = sub.caption, ...)
## Draw pretty "contour" lines through origin and label them
p <- x$rank
bval <- pretty(sqrt(p*cook/g), 5)
usr <- par("usr")
xmax <- usr[2L]
ymax <- usr[4L]
for(i in seq_along(bval)) {
bi2 <- bval[i]^2
if(p*ymax > bi2*xmax) {
xi <- xmax + strwidth(" ")/3
yi <- bi2*xi/p
abline(0, bi2, lty = 2)
text(xi, yi, paste(bval[i]), adj = 0, xpd = TRUE)
} else {
yi <- ymax - 1.5*strheight(" ")
xi <- p*yi/bi2
lines(c(0, xi), c(0, yi), lty = 2)
text(xi, ymax-0.8*strheight(" "), paste(bval[i]),
adj = 0.5, xpd = TRUE)
}
}
## axis(4, at=p*cook.levels, labels=paste(c(rev(cook.levels), cook.levels)),
## mgp=c(.25,.25,0), las=2, tck=0, cex.axis=cex.id)
mtext(getCaption(5), side = 3, line = 0.25, cex = cex.caption)
if (id.n > 0) {
show.r <- order(-cook)[iid]
text.id(g[show.r], cook[show.r], show.r)
}
dev.flush()
}
if (!one.fig && par("oma")[3L] >= 1)
mtext(sub.caption, outer = TRUE, cex = cex.oma.main)
invisible()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.