Nothing
# file complexlm/R/rlm.R
# copyright (C) 2020-2023 W. L. Ryan
# copyright (C) 1994-2020 W. N. Venables and B. D. Ripley
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 or 3 of the License
# (at your option).
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
#
#@useDynLib complexlm, .registration = TRUE # Not needed since lqs.R functions moved to experimental branch.
#' Robust Fitting of Linear Models, Compatible with Complex Variables
#'
#' Uses robust M-estimation to fit a linear model to numeric or complex data. Based on [MASS::rlm].
#'
#' @inherit MASS::rlm details references
#'
#' @details M-estimation works by finding the model coefficients that minimize the sum of a function of the residuals.
#' This function, called the objective function rho(), is a kind of statistical distance (AKA divergence), and a semimetric.
#' As a semimetric it is a function of the measured value `y` and the modeled value `Y` (residual \eqn{r = y - Y}) which maps from
#' the space of the data to the positive real numbers. Semimetrics can be defined for domains of any dimensionality, including the
#' two dimensional complex numbers, and thus so can M-estimation.
#' What's more, all the standard algebraic operations used in the itteratively (re)weighted least-squares M-estimator robust regression
#' algorithm are defined over the set of complex numbers. While ordering is not defined for them, it is the output of rho(), a real number, that must be
#' in M-estimation.
#'
#' @return An object of class `c("rzlm", "zlm", "rlm", "lm")`, or for numeric data `c("rlm", "lm")`.
#'
#' Objects of class "rzlm" are lists with the same components as "zlm" objects, as well as,
#' \item{`df.residual`}{`NA` For "rzlm" objects the residual degrees of freedom are always set to `NA` in order to avoid estimation of residual scale by "zlm" or "lm" methods.}
#' \item{`s`}{The robust scale estimate used.}
#' \item{`w`}{The weights used in the IWLS process.}
#' \item{`psi`}{The psi (itterative weighting) function with parameters substituted in.}
#' \item{`conv`}{The value of the convergence criteria at each iteration.}
#' \item{`converged`}{Did the IWLS process converge?}
#' \item{`wresid`}{A 'working residual', the residuals of the last re-weighted least-squares. Weighted by `weights` if "inv.var" weights were used. }
#'
#' See [MASS::rlm] for a description of "rlm" objects.
#'
#' @export
rlm <- function(x, ...) UseMethod("rlm")
#' @describeIn rlm S3 method for class 'formula'
#'
#' @param formula a [formula] object of the form y ~ x1 + x2. Note that algebraic expressions in formula cannot currently be used with complex data.
#' @param data a data frame containing the variables upon which a robust fit is to be applied.
#' @param weights numeric. A vector of weights to apply to the residuals.
#' @param ... additional arguments to be applied to called functions (rlm.complex and the psi function).
#' @param subset an index vector specifying the cases (rows of data or x and y) to be used for fitting.
#' @param na.action a function that specifies what to do if NAs are found in the fitting data. The default is to omit them via [na.omit]. Can also be changed by [options] (na.action =).
#' @param method string. What method of robust estimation should be used. Options are "M", "MM", or "model.frame". The default is M-estimation. MM-estimation has a high breakdown point but is not compatible with complex variables or case weights. model.frame just constructs the model frame, and only works with the formula method.
#' @param wt.method string, either "inv.var" or "case". Specifies whether the weights are case weights that give the relative importance of each observation (higher weight means more important) / case, or the inverse variances of the cases (higher weight means that observation is less variable / uncertain).
#' @param model logical. Should the model frame be included in the returned object?
#' @param x.ret logical. Should the model (design) matrix be included in the returned object?
#' @param y.ret logical. Should the response be included in the returned object?
#' @param contrasts optional contrast specifications: see [stats::lm]. Not compatible with complex data.
#'
#' @export
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slope <- complex(real = 4.23, imaginary = 2.323)
#' interc <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' xx <- complex(real= rnorm(n), imaginary= rnorm(n))
#' tframe <- data.frame(x = xx, y= slope*xx + interc + e)
#' rlm(y ~ x, data = tframe, weights = rep(1,n))
rlm.formula <-
function(formula, data, weights, ..., subset, na.action,
method = c("M", "MM", "model.frame"),
wt.method = c("inv.var", "case"),
model = TRUE, x.ret = TRUE, y.ret = FALSE, contrasts = NULL)
{
trms <- terms(x = formula, data = data)
respname <- as.character(attr(trms, "variables")[[attr(trms, "response") + 1]])
cl <- match.call()
if (is.complex(data[[respname]]) == FALSE) # Now compatible with tibble input.
{
cl[[1]] <- MASS::rlm
eval(cl, parent.frame())
}
else
{
mf <- match.call(expand.dots = FALSE)
mf$method <- mf $wt.method <- mf$model <- mf$x.ret <- mf$y.ret <- mf$contrasts <- mf$... <- NULL
mf[[1L]] <- quote(stats::model.frame) # Works with complex numbers.
mf <- eval.parent(mf)
method <- match.arg(method)
wt.method <- match.arg(wt.method)
if(method == "model.frame") return(mf)
mt <- attr(mf, "terms")
y <- model.response(mf) # Also works with complex numbers.
offset <- model.offset(mf) ## Note: offset is not the same thing as an intercept. offset is a known coefficient of the fit, for example the relationship between the response and one of the predictor variables.
if(!is.null(offset)) y <- y - offset
if(!is.null(contrasts)) warning("Contrasts are not supported for complex fits at the moment.")
x <- zmodel.matrix(mt, mf)
xvars <- as.character(attr(mt, "variables"))[-1L]
if ((yvar <- attr(mt, "response")) > 0L)
xvars <- xvars[-yvar]
xlev <- if (length(xvars) > 0L) {
xlev <- lapply(mf[xvars], levels)
xlev[!sapply(xlev, is.null)]
}
weights <- model.weights(mf)
if(!length(weights)) weights <- rep(1, nrow(x))
fit <- rlm.complex(x, y, weights, method = method,
wt.method = wt.method, ...)
fit$terms <- mt
## fix up call to refer to the generic, but leave arg name as `formula'
cl <- match.call()
cl[[1L]] <- as.name("rlm")
fit$call <- cl
fit$contrasts <- attr(x, "contrasts")
fit$xlevels <- .getXlevels(mt, mf)
fit$na.action <- attr(mf, "na.action")
if(model) fit$model <- mf
if(!x.ret) fit$x <- NULL
if(y.ret) fit$y <- y
## change in 7.3-52 suggested by Andr\'e Gillibert
fit$offset <- offset
if (!is.null(offset)) fit$fitted.values <- fit$fitted.values + offset
return(fit)
}
}
#' @describeIn rlm Complex Default S3 method
#'
#' @param x numeric or complex. A matrix, dataframe, or vector containing the explanatory / independent / predictor variables.
#' @param y numeric or complex. A vector containing the dependent / response variables, the same length as x.
#' @param ... additional arguments to be passed to rlm.default or to the psi function.
#' @param w (optional) initial down-weighting for each case
#' @param init (optional) initial values for the coefficients OR a method to find initial values OR the result of a fit with a coef component. Known methods are "ls" (the default) for an initial least-squares fit using weights w*weights, and "lts" for an unweighted least-trimmed squares fit with 200 samples.
#' @param psi the psi function is specified by this argument. It must give (possibly by name) a function g(x, ..., deriv) that for deriv=0 returns psi(x)/x and for deriv=1 returns psi'(x). Tuning constants will be passed in via ...
#' @param scale.est method of scale estimation: re-scaled MAD of the residuals (default) or Huber's proposal 2 (which can be selected by either "Huber" or "proposal 2"). Only MAD is implemented for complex variables.
#' @param k2 tuning constant used for Huber proposal 2 scale estimation.
#' @param maxit maximum number of IWLS iterations.
#' @param acc the accuracy for the stopping criterion.
#' @param test.vec the stopping criterion is based on changes in this vector.
#' @param lqs.control An optional list of control values for [lqs], ignored.
#' @param interc TRUE or FALSE, default is FALSE. Used with rlm.default when fitting complex valued data. If true, a y-intercept is calculated during fitting. Otherwise, the intercept is set to zero.
#'
#' @export
#' @examples
#' set.seed(4242)
#' n <- 8
#' slope <- complex(real = 4.23, imaginary = 2.323)
#' intercept <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' x <- complex(real = rnorm(n), imaginary = rnorm(n))
#' y <- slope * x + intercept + e
#' rlm(x = x, y = y, weights = rep(1,n), interc = TRUE)
rlm.complex <-
function(x, y, weights, ..., w = rep(1, nrow(x)),
init = "ls", psi = psi.huber,
scale.est = c("MAD", "Huber", "proposal 2"), k2 = 1.345,
method = c("M", "MM"), wt.method = c("inv.var", "case"),
maxit = 20, acc = 1e-4, test.vec = "resid", lqs.control=NULL, interc=FALSE)
{
thiscall <- match.call()
if (is.complex(x))
{
if (!is.matrix(x) && interc) {
xx <- as.matrix(data.frame(rep(1, length(x)), x))
attr(xx, "dimnames") <- list(as.character(1:length(x)), c("(intercept)", deparse(substitute(x))))
attr(xx, "assign") <- c(0,1)
x <- xx
}
irls.delta <- function(old, new) {
as.numeric(sqrt(sum(Conj(old - new)*(old - new))/max(1e-20, as.numeric(sum(Conj(old)*old)))))
}
irls.rrxwr <- function(x, w, r)
{
w <- sqrt(w)
max(abs((matrix(r * w, 1L, length(r)) %*% x)/
sqrt(matrix(w, 1L, length(r)) %*% (x^2))))/abs(sqrt(sum(w * r^2)))
# What is the point of the max() here? As far as I can tell, the matrix multiplication would return
# a single value...? The max function turns the 1x1 matrix into just a number. Unexpected use, but ok.
# Oh, right; x can, and often will be, a matrix.
}
# wmad function used to be here. Moved up in rank to a user accessible function.
method <- match.arg(method)
wt.method <- match.arg(wt.method)
nmx <- deparse(substitute(x))
if(is.null(dim(x))) {
x <- as.matrix(x)
colnames(x) <- nmx
} else x <- as.matrix(x)
if(is.null(colnames(x)))
colnames(x) <- paste("X", seq(ncol(x)), sep="")
if(qr(x)$rank < ncol(x))
stop("'x' is singular: singular fits are not implemented in 'rlm'")
if(!(any(test.vec == c("resid", "coef", "w", "NULL"))
|| is.null(test.vec))) stop("invalid 'test.vec'")
## deal with weights
xx <- x
yy <- y
if(!missing(weights)) {
if(length(weights) != nrow(x))
stop("length of 'weights' must equal number of observations")
if(any(weights < 0)) stop("negative 'weights' value")
if(wt.method == "inv.var") {
fac <- sqrt(weights)
y <- y*fac; x <- x* fac
wt <- NULL
} else {
w <- w * weights
wt <- weights
}
} else wt <- NULL
if(method == "M") {
scale.est <- match.arg(scale.est)
if(!is.function(psi)) psi <- get(psi, mode="function")
## match any ... args to those of psi.
arguments <- list(...)
if(length(arguments)) {
pm <- pmatch(names(arguments), names(formals(psi)), nomatch = 0L)
if(any(pm == 0L)) warning("some of ... do not match")
pm <- names(arguments)[pm> 0L]
formals(psi)[pm] <- unlist(arguments[pm])
}
if(is.character(init)) {
temp <- if(init == "ls") zlm.wfit(x, y, w, method="qr")
else if(init == "lts") {
if(is.null(lqs.control)) lqs.control <- list(nsamp=200L)
do.call("lqs", c(list(x, y, intercept = FALSE), lqs.control))
} else stop("'init' method is unknown")
coef <- temp$coefficients
resid <- temp$residuals
} else {
if(is.list(init)) coef <- init$coef
else coef <- init
resid <- drop(y - x %*% coef)
}
} else if(method == "MM") {
stop("'method' = 'MM' is not supported for complex numbers.
MM-estimation requires that quantiles be defined over the domain and co-domain of the model.
Please use 'method' = 'M' instead. If you require S-estimation or MM-estimation, and have a good complex quantile function you would like to use,
please contact the developer.")
# scale.est <- "MM"
# temp <- do.call("lqs",
# c(list(x, y, intercept = interc, method = "S",
# k0 = 1.548), lqs.control))
# coef <- temp$coefficients
# resid <- temp$residuals
# psi <- psi.bisquare
# if(length(arguments <- list(...)))
# if(match("c", names(arguments), nomatch = 0L)) {
# c0 <- arguments$c
# if (c0 > 1.548) formals(psi)$c <- c0
# else
# warning("'c' must be at least 1.548 and has been ignored")
# }
# scale <- temp$scale
} else stop("'method' is unknown")
done <- FALSE
conv <- NULL
n1 <- (if(is.null(wt)) nrow(x) else sum(wt)) - ncol(x)
#theta <- 2*pnorm(k2) - 1 # pnorm() gives CDF of a normal distribution at k2 quantile. Do I need a complex normal pnorm()? Does such a thing exist? Maybe I should just ditch Huber's proposal 2...
#gamma <- theta + k2^2 * (1 - theta) - 2 * k2 * dnorm(k2) # dnorm gives density of normal distribution at quantile k2. Shoot.
## At this point the residuals are weighted for inv.var and
## unweighted for case weights. Only Huber handles case weights
## correctly.
residdf <- data.frame(Re(resid), Im(resid))
if(scale.est != "MM")
scale <- if(is.null(wt)) {
median(abs(resid - 0))/0.6745 # The median absolute deviation, centered on zero. Modeled on rlm from MASS, but I don't think that the median of the residuals can be counted on to always be the case...
} else wmedian(abs(resid - 0), wt)/0.6745
for(iiter in 1L:maxit) {
if(!is.null(test.vec)) testpv <- get(test.vec)
if(scale.est != "MM") {
scale <- if(scale.est == "MAD")
if(is.null(wt)) median(abs(resid))/0.6745 else wmedian(abs(resid), wt)/0.6745 # This is the median absolute deviation from the last fit, not the more standard median absolute deviation from the median.
#else if(is.null(wt)) ## The two lines below are the Huber proposal 2 scale estimate. Why they didn't use the Huber function included in the package elsewhere is beyond me...
# sqrt(sum(pmin(Conj(resid)*resid, Conj(k2 * scale)*(k2 * scale)))/(n1*gamma))
#else sqrt(sum(wt*pmin(Conj(resid)*resid, Conj(k2 * scale)*(k2 * scale)))/(n1*gamma))
else
{
warning("Only MAD scale is supported for complex variables. Continuing with scale.est = \"MAD.\"")
scale <- if(scale.est == "MAD")
if(is.null(wt)) median(abs(resid))/0.6745 else wmedian(abs(resid), wt)/0.6745
}
if(scale == 0) {
done <- TRUE
break
}
}
w <- psi(resid/scale)
#print(length(w)) #debug
if(!is.null(wt)) w <- w * weights
temp <- zlm.wfit(x, y, w, method="qr")
coef <- temp$coefficients
resid <- temp$residuals
if(!is.null(test.vec)) convi <- irls.delta(testpv, get(test.vec))
else convi <- irls.rrxwr(x, w, resid)
conv <- c(conv, convi)
done <- (convi <= acc)
if(done) break
}
if(!done)
warning(gettextf("'rlm' failed to converge in %d steps", maxit),
domain = NA)
fitted <- drop(xx %*% coef)
## fix up call to refer to the generic, but leave arg name as `formula'
cl <- match.call()
cl[[1L]] <- as.name("rlm")
fit <- list(coefficients = coef, residuals = yy - fitted, wresid = resid, # If inv.var weights are used, wresid are pre-weighted, otherwise they are the same as residuals.
effects = temp$effects,
rank = temp$rank, fitted.values = fitted,
assign = temp$assign, qr = temp$qr, df.residual = NA, w = w,
s = scale, psi = psi, k2 = k2,
weights = if(!missing(weights)) weights,
conv = conv, converged = done, x = xx, call = cl)
class(fit) <- c("rzlm", "zlm", "rlm", "lm")
return(fit)
}
else print("ERROR: data must be complex.")
}
### I don't think I need a separate print.rlm function for complex variables...
# print.rlm <- function(x, ...)
# {
# if(!is.null(cl <- x$call)) {
# cat("Call:\n")
# dput(cl, control=NULL)
# }
# if(x$converged)
# cat("Converged in", length(x$conv), "iterations\n")
# else cat("Ran", length(x$conv), "iterations without convergence\n")
# coef <- x$coefficients
# cat("\nCoefficients:\n")
# print(coef, ...)
# nobs <- length(x$residuals)
# rdf <- nobs - length(coef)
# cat("\nDegrees of freedom:", nobs, "total;", rdf, "residual\n")
# if(nzchar(mess <- naprint(x$na.action))) cat(" (", mess, ")\n", sep="")
# cat("Scale estimate:", format(signif(x$s,3)), "\n")
# invisible(x)
# }
#' Summary Method for Robust Linear Models
#'
#' Summary method for objects of class "rzlm", capable of processing complex variable fits.
#' If the residuals in the passed object are numeric, this function just calls [MASS::summary.rlm()].
#'
#' @param object An object inheriting from the class 'rzlm'. That is, a fitted model that was generated by [complexlm::rlm].
#' @param method Character string indicating if the IWLS weights should be used when calculating matrix cross-products. "XtX" does not include weights, "XtWX" does.
#' @param correlation Logical. Should the correlations be computed and printed?
#' @param ... Other arguments, passed to or from other methods.
#'
#' @details
#' This is a method for the generic `summary()` function. It is based on the function of the same name from MASS, but has been modified to accept complex numbers.
#' In addition to the standard error of the coefficients, it calculates a "pseudo standard error" or "relational standard error" as the square root of the "pseudo-variance".
#' This is a complex number that quantifies the covariance between the real and imaginary parts. It can also be thought of as the amount and direction of anisotropy in the
#' (presumed complex normal) probability distribution in the complex plane. The argument of this number gives the direction of the semi-major axis.
#'
#' @return
#' Returns a list containing the following elements. If the list is printed by the function (`print.summary.rlm` or invoked as a method by `summary`), a null value is returned instead.
#' \item{`correlation`}{A numeric matrix. The computed correlation coefficient matrix for the coefficients in the model.}
#' \item{`pseudocorrelation`}{A complex matrix. The computed pseudo-correlation coefficient matrix for the coefficients in the model.}
#' \item{`cov.unscaled`}{The unscaled covariance matrix; i.e, a numeric matrix such that multiplying it by an estimate of the error variance produces an estimated covariance matrix for the coefficients.}
#' \item{`pcov.unscaled`}{The unscaled pseudo-covariance matrix; i.e, a complex matrix such that multiplying it by an estimate of the error pseudo-variance produces an estimated pseudo-covariance matrix for the coefficients.}
#' \item{`sigma`}{Numeric. The scale estimate returned by `rlm`, which was used to scale the residuals before passing them to the `psi` weight function.}
#' \item{`stddev`}{Numeric. A scale estimate used for the standard errors. Calculated from the working residuals, the `psi` weight function, and the derivative of the influence function (`psi.method(deriv = 1)`).}
#' \item{`pstddev`}{Complex. A scale estimate used for the 'pseudo' standard errors. Calculated from the working residuals, the `psi` weight function, and the derivative of the influence function (`psi.method(deriv = 1)`). See details above.}
#' \item{`df`}{The number of degrees of freedom for the model and for residuals.}
#' \item{`coefficients`}{A 4 column matrix that contains the model coefficients, their standard errors, their pseudo standard errors (see details above), and their t statistics.}
#' \item{`terms`}{The terms object used in fitting this model.}
#' @export
#'
#' @references
#' W. N. Venables and B. D. Ripley, Modern Applied Statistics with S, 4th ed (Springer, New York, 2002).
#' P. J. Huber and E. Ronchetti, Robust Statistics, 2nd ed (Wiley, Hoboken, N.J, 2009).
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slope <- complex(real = 4.23, imaginary = 2.323)
#' intercept <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' x <- complex(real = rnorm(n), imaginary = rnorm(n))
#' y <- slope * x + intercept + e
#' robfit <- rlm(x = x, y = y, weights = rep(1,n), interc = TRUE)
#' summary(robfit)
summary.rzlm <- function(object, method = c("XtX", "XtWX"),
correlation = FALSE, ...)
{
method <- match.arg(method)
s <- object$s
p <- object$rank
coef <- object$coefficients[object$qr$pivot[1L:p]] # Pivot coefficients to account for pivoting in qr.
ptotal <- length(coef)
wresid <- object$wresid
res <- object$residuals
n <- length(wresid)
if(any(na <- is.na(coef))) coef <- coef[!na]
cnames <- names(coef)
#p <- length(coef)
rinv <- diag(p)
dimnames(rinv) <- list(cnames, cnames)
wts <- if(length(object$weights)) object$weights else rep(1, n)
if(length(object$call$wt.method) && object$call$wt.method == "case") {
rdf <- sum(wts) - p ## 'residual degrees of freedom'
w <- object$psi(wresid/s)
S <- sum(as.numeric(wts * (wresid*w)*Conj(wresid*w)))/rdf # The estimated variance of the residuals is analogous to the area of a circle around the estimated parameters, so it is a real number.
pS <- sum(wts * (wresid*w)*(wresid*w))/rdf # The estimated pseudo-variance is a complex number that describes the anisotropy of the distribution or set. wresid*w is the influence function
# Distributions that are less circularly symmetric and more bilaterally symmetric have higher pseudovariance.
# The direction of the pseudovariance indicates the orientation of the anisotropy; bilateral symmetry axis angle from
# the +real axis = pseudovariance angle divided by two. In other words, pseudovariance is the covariance between real and imaginary.
psiprime <- object$psi(wresid/s, deriv=1) # This region of code occupies the position that finding the sum of squared deviations from the mean of the fitted values does in summary.lm().
m1 <- sum(wts*psiprime) # What are these for? psiprime will depend on length (size) of residual, and direction. It depends on what psi function was chosen. Huber will have psiprime = 1 Exp[i (phi - pi)] for small residuals and 0 for large ones. A kind of weighted sample 1st moment of psiprime.
m2 <- sum(wts*as.numeric(Conj(psiprime)*psiprime))
pm2 <- sum(wts*psiprime*psiprime) # 'pseudo m2' a complex quantity.
nn <- sum(wts) # "sample size" is sum of the weights, makes sense for case weights I suppose.
mn <- m1/nn # 'mn' is "mean" -> This is the mean psiprime, the average derivative of the influence function for the fit in question.
kappa <- 1 + p*(m2 - as.numeric(Conj(m1)*m1)/nn)/(nn-1)/(nn*as.numeric(Conj(mn)*mn)) #Check this. It resembles the uncertainty of a quantum operator, sigma_x = sqrt(<x^2> - <x>^2). # It almost looks like it's missing a parentheses...
stddev <- sqrt(S)*(kappa/abs(mn))
pkappa <- 1 + p*(pm2 - m1^2/nn)/(nn-1)/(nn*mn^2) # 'pseudo kappa', a complex thing
pstddev <- sqrt(pS)*(pkappa/mn) # The 'pseudo standard deviation', analogous with the pseudo-variance. Might be useful, might be meaningless.
} else {
res <- res * sqrt(wts)
rdf <- n - p ## 'residual degrees of freedom'
w <- object$psi(wresid/s)
S <- sum(as.numeric(wresid*w*Conj(wresid*w)))/rdf # The estimated variance is analogous to the area of a circle around the estimated parameters, so it is a real number.
pS <- sum((wresid*w)*(wresid*w))/rdf # See above definition of pS.
psiprime <- object$psi(wresid/s, deriv=1) # The derivative of the influence function.
mn <- mean(psiprime)
#pvarpsi <- sum((psiprime - mn)^2)/(n-1)
kappa <- 1 + p*(n-1)*var(psiprime)/(n^2*as.numeric(Conj(mn)*mn)) # This has something to do with propagation of uncertainty, I think.
#print(var(psiprime))
#pkappa <- 1 + p*pvarpsi/(n*mn^2)
pkappa <- 1 + p*(n-1)*var(psiprime, pseudo = TRUE)/(n^2*mn^2)
stddev <- sqrt(S)*(kappa/abs(mn)) ## Would it be useful to do something similar with pseudo-variance? Probably.
pstddev <- sqrt(pS)*(pkappa/mn) # The 'pseudo standard deviation', analogous with the pseudo-variance. Might be useful, might be meaningless.
}
X <- if(length(object$weights)) object$x * sqrt(object$weights) else object$x # The model (design) matrix, or the model matrix times the sqrt of the weights.
if(method == "XtWX") { # (X transposed) [weight matrix] (X) #wts are not the IWLS weights, they are the ones given by the user. w are the IWLRS weights.
mn <- sum(wts*w)/sum(wts)
X <- X * sqrt(w/mn)
}
R <- qr(X)$qr
R <- R[1L:p, 1L:p, drop = FALSE] # Trim the bottom of R, making it a square p by p matrix.
R[lower.tri(R)] <- 0 # Remove the lower triangular matrix, the Q from the QR decomposition.
rinv <- solve(R, rinv) # This is efficient, we only need the diagonal matrix diag(p) to get rinv, so just set rinv <- diag(p) ahead of time. Now rinv is a different p by p matrix.
dimnames(rinv) <- list(cnames, cnames)
rowlen <- (abs(Conj(rinv)*rinv) %*% rep(1, p))^0.5 # Produces a real vector of length p. Elements are the sum of the rows of (X^T X)^-1, each square rooted.
prowlen <- (rinv^2 %*% rep(1, p))^0.5 # Produces a complex vector of length p
names(rowlen) <- cnames # cnames are the names of the coefficients.
if(correlation) {
correl <- rinv * array(1/rowlen, c(p, p))
correl <- correl %*% Conj(t(correl))
pcorrel <- rinv * array(1/prowlen, c(p, p))
pcorrel <- pcorrel %*% t(pcorrel)
} else {
correl <- NULL
pcorrel <- NULL
}
coef <- array(coef, c(p, 4L)) # Make an array with 4 columns and p rows. put the coefficients into the first column.
#dimnames(coef) <- list(rev(cnames), c("Value", "Std. Error", "Pseudo Std. Error", "t value")) # The way that list() orders cnames seems to be opposite that of matrix algebra, so we need rev() to assign them properly.
dimnames(coef) <- list(cnames, c("Value", "Std. Error", "Pseudo Std. Error", "t value"))
#print(rinv)
coef[, 2] <- rowlen %o% stddev # Fill the 2nd column of the coef array. These should be real numbers. Isn't stddev a single number? What is the point of the outer product? It transposes the vector into a column while multiplying all elements by stddev.
coef[, 3] <- prowlen %o% pstddev # The 'pseudo - standard error', these things need better names...
coef[, 4] <- coef[, 1]/coef[, 2]
object <- object[c("call", "na.action")]
object$residuals <- res
object$coefficients <- coef
object$sigma <- s
#print(var(psiprime))
#print(stddev)
object$stddev <- stddev
object$pstdev <- pstddev
object$df <- c(p, rdf, ptotal)
object$r.squared <- NA
object$cov.unscaled <- rinv %*% Conj(t(rinv))
object$pcov.unscaled <- rinv %*% t(rinv) # pseudo covariance
object$correlation <- correl
object$pseudocorrelation <- pcorrel
object$terms <- NA
class(object) <- c("summary.rzlm", "summary.zlm", "summary.rlm", "summary.lm")
return(object)
}
## This function has been depreciated / merged into print.summary.zlm()
#print.summary.rzlm <- function(x, digits = max(3, .Options$digits - 3), ...)
### NOTE: These psi functions are actually weight functions, weight(u) = abs( influence(u) / u).
### Traditionally the influence functions are given the symbol psi.
### The derivative returned by these functions, when given deriv=1, is the (absolute value of the) derivative of the influence function,
### not the weight function.
### NOTE: The derivative of the complex influence function will be a complex number, and the direction does have meaning.
### Derivative output modified to incorporate the complex nature of the influence function.
#' Weighting functions for robust fitting
#'
#' Weighting functions used in `rlm` for iteratively reweighted least squares.
#' Based on the same functions from MASS, modified to accept complex variables.
#' While named 'psi', these are actually weight functions, weight(u) = abs( influence(u) / u).
#'
#' @param u Numeric or complex. When used in M-estimation, a residual.
#' @param k Numeric. A scaling constant for `psi.huber`. Default value is 1.345
#' @param a Numeric. A scaling constant for `psi.hampel`. Default value is 2
#' @param b Numeric. A scaling constant for `psi.hampel`. Default value is 4
#' @param c Numeric. A scaling constant for 'psi.hampel` or `psi.bisquare`. Default is 8 for the former and 4.685 for the later.
#' @param deriv Numeric. If `0`, return the weight at `u`. If `1` return the first derivative of the influence function at `u`.
#'
#' @details
#' Three weight functions for iteratively (re)weighted least squares. When used in M-estimation, `psi.huber` has
#' a unique solution. `psi.hampel` and `psi.bisquare`, which are redescending M-estimators, do not have a unique solution.
#' These are capable of completely rejecting outliers, but need a good starting point to avoid falling into unhelpful local minima.
#' If `deriv` is set to `1`, the functions return the value of the first derivative of the influence function at `u`.
#' Note that they do not return the derivative of the weight function, as might be expected.
#'
#' @return A numeric or complex that is either the value of the weight function at `u` (numeric) or the first derivative of the influence function at `u` (can be complex).
#' @export
#'
#' @examples
#' set.seed(4242)
#' z <- complex(real = rnorm(3), imaginary = rnorm(3))
#' psi.huber(z)
#' psi.hampel(z)
#' psi.bisquare(z)
#' psi.huber(z, deriv=1)
#' psi.hampel(z, deriv=1)
psi.huber <- function(u, k = 1.345, deriv=0)
{
if(!deriv) return(pmin(1, k / abs(u)))
dhub <- ifelse(abs(u) <= k, complex(modulus = 1, argument = 0), 0)
return(as.vector(dhub)) # Just in case this comes out as a column vector as well.
}
#' @describeIn psi.huber The weight function of the hampel objective function.
#'
#' @export
psi.hampel <- function(u, a = 2, b = 4, c = 8, deriv=0)
{
U <- pmin(abs(u) + 1e-50, c)
ham <- ifelse(U <= a, 2 * a / b, ifelse(U <= b, a / U, ifelse(U <= c, (a / (b - c)) * (1 - c / U))))
#print(ham)
if(!deriv) return(as.vector(ham)) # It seems that ham often ends up being a column matrix...
#if(!deriv) return(as.vector(ifelse(U <= a, 2 * U * a / b, ifelse(U <= b, a, a * (c - U) / (c - b) )) / U)) # Strange way to write this equation...
dham <- ifelse(abs(u) <= c, ifelse(U <= a, 2 * a / b, ifelse(U <= b, 0, a / (b - c))), 0)
return(as.vector(dham)) # dham also tends to be a column matrix...
}
#' @describeIn psi.huber The weight function of Tukey's bisquare objective function.
#'
#' @export
psi.bisquare <- function(u, c = 4.685, deriv=0)
{
cll <- match.call()
if (!is.complex(u)) {
#warning("dog")
cll[[1]] <- MASS::psi.bisquare
eval(cll, parent.frame())
}
else
{
if(!deriv) return((1 - pmin(1, abs(u/c))^2)^2)
t <- (u/c)
#warning(t)
#warning(abs(t))
ifelse(Mod(t) < 1, (1 - Conj(t)*t) * ( (1 - Conj(t) * t) * complex(modulus = 1, argument = 2 * Arg(t)) - 4 * t^2), 0)
#return('cat')
}
}
# se.contrast.rlm <- # I don't understand this code. Hopefully it works fine with complex numbers as is.
# function(object, contrast.obj,
# coef = contr.helmert(ncol(contrast))[, 1L],
# data = NULL, ...)
# {
# contrast.weight.aov <- function(object, contrast)
# {
# asgn <- object$assign[object$qr$pivot[1L:object$rank]]
# uasgn <- unique(asgn)
# nterms <- length(uasgn)
# nmeffect <- c("(Intercept)",
# attr(object$terms, "term.labels"))[1L + uasgn]
# effects <- as.matrix(qr.qty(object$qr, contrast))
# res <- matrix(0, nrow = nterms, ncol = ncol(effects),
# dimnames = list(nmeffect, colnames(contrast)))
# for(i in seq(nterms)) {
# select <- (asgn == uasgn[i])
# res[i,] <- colSums(effects[seq_along(asgn)[select], , drop = FALSE]^2)
# }
# res
# }
# if(is.null(data)) contrast.obj <- eval(contrast.obj)
# else contrast.obj <- eval(substitute(contrast.obj), data, parent.frame())
# if(!is.matrix(contrast.obj)) { # so a list
# if(!missing(coef)) {
# if(sum(coef) != 0)
# stop("'coef' must define a contrast, i.e., sum to 0")
# if(length(coef) != length(contrast.obj))
# stop("'coef' must have same length as 'contrast.obj'")
# }
# contrast <-
# sapply(contrast.obj, function(x)
# {
# if(!is.logical(x))
# stop(gettextf("each element of '%s' must be logical",
# substitute(contrasts.list)),
# domain = NA)
# x/sum(x)
# })
# if(!length(contrast) || all(is.na(contrast)))
# stop("the contrast defined is empty (has no TRUE elements)")
# contrast <- contrast %*% coef
# } else {
# contrast <- contrast.obj
# if(any(abs(colSums(contrast)) > 1e-8))
# stop("columns of 'contrast.obj' must define a contrast (sum to zero)")
# if(!length(colnames(contrast)))
# colnames(contrast) <- paste("Contrast", seq(ncol(contrast)))
# }
# weights <- contrast.weight.aov(object, contrast)
# summary(object)$stddev *
# if(!is.matrix(contrast.obj)) sqrt(sum(weights)) else sqrt(colSums(weights))
# }
## I don't think this function is actually necessary, predict.rlm() should work fine
## Indeed it does. This function / method is now depreciated.
# @describeIn rlm Predict new data based on the model in object. Invokes `predict.lm`.
# @param object a `rlm` object; a fit from which you wish to predict new data.
# @param newdata new predictor data from which to predict response data. Default is NULL.
# @param scale this seems to be ignored. Default is NULL.
# @param ... further arguments to be passed to NextMethod().
#
# @export
#predict.rzlm <- function (object, newdata = NULL, scale = NULL, ...)
#{
## problems with using predict.lm are the scale and
## the QR decomp which has been done on down-weighted values.
## Only works if explicit weights are given during the call that produced object..?
# object$qr <- qr(sqrt(object$weights) * object$x)
#print('cats') # For debugging.
# NextMethod(object, scale = object$s, ...) # So this just calls predict.lm on the object.
#}
#' Calculate Variance-Covariance Matrix and Pseudo Variance-Covariance Matrix for a Complex Fitted Model Object
#'
#' A version of [stats::vcov] that is compatible with complex linear models. In addition to the variance-covariance matrix,
#' the pseudo variance-covariance matrix, which is a measure of the covariance between real and imaginary components, is returned as well.
#' Can also return the "big covariance" matrix, which combines the information of the covariance matrix and the pseudo-covariance matrix, as described in (van den Bos 1995).
#' While not as compact as two seperate smaller matrices, the big covariance matrix simplifies calculations such as the [mahalanobis] distance.
#'
#' @param object a fitted model object, typically. Sometimes also a summary() object of such a fitted model.
#' @param ... Additional parameters, not currently used for anything.
#' @param merge logical. Should the covariance matrix and pseudo-covariance / relational matrix be merged into one matrix of twice the dimensions? Default is TRUE.
#'
#' @return
#' If `merge` is false, a list containing both the numeric variance-covariance matrix, and the complex pseudo variance-covariance matrix.
#' If `merge` is true, a large matrix (both dimensions twice the number of coefficients) containing both the variance-covariance matrix and the pseudo variance-covariance matrix, merged together.
#' @export
#'
#' @references A. van den Bos, The Multivariate Complex Normal Distribution-a Generalization, IEEE Trans. Inform. Theory 41, 537 (1995).
#'
#' @examples
#' set.seed(4242)
#' n <- 8
#' slope <- complex(real = 4.23, imaginary = 2.323)
#' intercept <- complex(real = 1.4, imaginary = 1.804)
#' e <- complex(real=rnorm(n)/6, imaginary=rnorm(n)/6)
#' x <- complex(real = rnorm(n), imaginary = rnorm(n))
#' y <- slope * x + intercept + e
#' robfit <- rlm(x = x, y = y, weights = rep(1,n), interc = TRUE)
#' summary(robfit)$stddev
#' vcov(robfit)
vcov.rzlm <- function(object, merge = TRUE, ...)
{
so <- summary(object, corr = FALSE)
#print(dim(so$stddev^2)) #Debugging help
#print(dim(so$cov.unscaled)) #For debugging
varcovar <- so$stddev^2 * so$cov.unscaled
pseudovarcovar <- so$pstddev^2 * so$pcov.unscaled
if (merge == TRUE)
{
#bigcovar <- diag(rep(diag(varcovar), each = 2)) # Start by making a square diagonal matrix with two adjacent diagonal elements for each variance.
n <- attributes(varcovar)$dim[1] # The size of the square covariance matrix.
bigcovar <- matrix(0, ncol = 2*n, nrow = 2*n) #Start by making an empty matrix with dimensions twice those of the small covariance matrix.
bigcovar[,seq(1, 2*n, 2)] <- matrix(as.vector(rbind(as.vector(varcovar), as.vector(Conj(pseudovarcovar)))), nrow = 2*n) # Fill the odd indexed columns of bigcovar with the entries from varcovar interleaved with the entries from pseudovarcovar conjugated.
bigcovar[,seq(2, 2*n, 2)] <- matrix(as.vector(rbind(as.vector(pseudovarcovar), as.vector(varcovar))), nrow = 2*n) # Fill the even indexed columns of bigcovar with the entries from varcovar interleaved with the entries from pseudovarcovar, this time the later first.
return(bigcovar)
}
else return(list(varcovar = as.numeric(varcovar), pseudovarcovar = pseudovarcovar))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.