R/cqplot.R

Defines functions cqplot.default cqplot.mlm `cqplot`

Documented in cqplot.default cqplot.mlm

# plot.mlm:  diagnostic plots for a MLM
# 1: cqplot:  chisq QQ plot
# 2: mvinfluence plot?
# see: http://svitsrv25.epfl.ch/R-doc/library/snpMatrix/html/qq.chisq.html



#' Chi Square Quantile-Quantile plots
#' 
#' A chi square quantile-quantile plots show the relationship between
#' data-based values which should be distributed as \eqn{\chi^2} and
#' corresponding quantiles from the \eqn{\chi^2} distribution.  In multivariate
#' analyses, this is often used both to assess multivariate normality and check
#' for outliers, using the Mahalanobis squared distances (\eqn{D^2}) of
#' observations from the centroid.
#' 
#' \code{cqplot} is a more general version of similar functions in other
#' packages that produce chi square QQ plots. It allows for classical
#' Mahalanobis squared distances as well as robust estimates based on the MVE
#' and MCD; it provides an approximate confidence (concentration) envelope
#' around the line of unit slope, a detrended version, where the reference line
#' is horizontal, the ability to identify or label unusual points, and other
#' graphical features.
#' 
#' The method for \code{"mlm"} objects applies this to the residuals from the
#' model.
#' 
#' The calculation of the confidence envelope follows that used in the SAS
#' program, \url{http://www.datavis.ca/sasmac/cqplot.html} which comes from
#' Chambers et al. (1983), Section 6.8.
#' 
#' The essential formula is 
#' \deqn{ SE ( z_{(i)} ) = \hat{\delta} /g ( q_i)) \times \sqrt{  p_i (1-p_i) / n } }
#' where \eqn{z_{(i)}} is the i-th
#' order value of \eqn{D^2}, \eqn{\hat{\delta}} is an estimate of the slope of
#' the reference line obtained from the corresponding quartiles and
#' \eqn{g(q_i)} is the density of the chi square distribution at the quantile
#' \eqn{q_i}.
#' 
#' Note that this confidence envelope applies only to the \eqn{D^2} computed
#' using the classical estimates of location and scatter. The
#' \code{car::qqPlot()} function provides for simulated envelopes, but only for
#' a univariate measure. Oldford (2016) provides a general theory and methods
#' for QQ plots.
#' 
#' @aliases cqplot cqplot.default cqplot.mlm
#' @param x either a numeric data frame or matrix for the default method, or an
#'          object of class \code{"mlm"} representing a multivariate linear model.  In
#'          the latter case, residuals from the model are plotted.
#' @param \dots Other arguments passed to methods
#' @param method estimation method used for center and covariance, one of:
#'               \code{"classical"} (product-moment), \code{"mcd"} (minimum covariance
#'               determinant), or \code{"mve"} (minimum volume ellipsoid).
#' @param detrend logical; if \code{FALSE}, the plot shows values of \eqn{D^2}
#'                vs. \eqn{\chi^2}. if \code{TRUE}, the ordinate shows values of \eqn{D^2 -
#' \chi^2}
#' @param pch plot symbol for points. Can be a vector of length equal to the
#'            number of rows in \code{x}.
#' @param col color for points. Can be a vector of length equal to the
#'            number of rows in \code{x}.
#'            The default is the \emph{first} entry in the
#'            current color palette (see \code{\link[grDevices]{palette}} and
#'            \code{\link[graphics]{par}}.
#' @param cex character symbol size for points.  Can be a vector of length
#'            equal to the number of rows in \code{x}.
#' @param ref.col Color for the reference line
#' @param ref.lwd Line width for the reference line
#' @param conf confidence coverage for the approximate confidence envelope
#' @param env.col line color for the boundary of the confidence envelope
#' @param env.lwd line width for the confidence envelope
#' @param env.lty line type for the confidence envelope
#' @param env.fill logical; should the confidence envelope be filled?
#' @param fill.alpha transparency value for \code{fill.color}
#' @param fill.color color used to fill the confidence envelope
#' @param labels vector of text strings to be used to identify points, defaults
#'            to \code{rownames(x)} or observation numbers if \code{rownames(x)} is
#'            \code{NULL}
#' @param id.n number of points labeled. If \code{id.n=0}, the default, no
#'             point identification occurs.
#' @param id.method point identification method. The default
#'             \code{id.method="y"} will identify the \code{id.n} points with the largest
#'             value of abs(y-mean(y)). See \code{\link[car]{showLabels}} for other
#'             options.
#' @param id.cex size of text for point labels
#' @param id.col color for point labels
#' @param xlab label for horizontal (theoretical quantiles) axis
#' @param ylab label for vertical (empirical quantiles) axis
#' @param main plot title
#' @param what the name of the object plotted; used in the construction of
#'             \code{main} when that is not specified.
#' @param ylim limits for vertical axis.  If not specified, the range of the
#'             confidence envelope is used.
#' @return Returns invisibly the vector of squared Mahalanobis distances
#'             corresponding to the rows of \code{x} or the residuals of the model for the identified points, else \code{NULL}
#' @author Michael Friendly
#' @seealso \code{\link{Mahalanobis}} for calculation of Mahalanobis squared distance;
#' 
#' \code{\link[stats]{qqplot}}; \code{\link[car]{qqPlot}} can give a similar
#'          result for Mahalanobis squared distances of data or residuals;
#'          \code{\link[qqtest]{qqtest}} has many features for all types of QQ plots.
#' @references 
#' J. Chambers, W. S. Cleveland, B. Kleiner, P. A. Tukey (1983).
#' \emph{Graphical methods for data analysis}, Wadsworth.
#' 
#' R. W. Oldford (2016), "Self calibrating quantile-quantile plots", 
#' \emph{The American Statistician}, 70, 74-90.
#' @keywords hplot
#' @examples
#' 
#' 
#' cqplot(iris[, 1:4])
#' 
#' iris.mod <- lm(as.matrix(iris[,1:4]) ~ Species, data=iris)
#' cqplot(iris.mod, id.n=3)
#' 
#' # compare with car::qqPlot
#' car::qqPlot(Mahalanobis(iris[, 1:4]), dist="chisq", df=4)
#' 
#' 
#' # Adopted data
#' Adopted.mod <- lm(cbind(Age2IQ, Age4IQ, Age8IQ, Age13IQ) ~ AMED + BMIQ, 
#'                   data=Adopted)
#' cqplot(Adopted.mod, id.n=3)
#' cqplot(Adopted.mod, id.n=3, method="mve")
#' 
#' 
#' # Sake data
#' Sake.mod <- lm(cbind(taste, smell) ~ ., data=Sake)
#' cqplot(Sake.mod)
#' cqplot(Sake.mod, method="mve", id.n=2)
#' 
#' # SocialCog data -- one extreme outlier
#' data(SocialCog)
#' SC.mlm <-  lm(cbind(MgeEmotions,ToM, ExtBias, PersBias) ~ Dx,
#'                data=SocialCog)
#' cqplot(SC.mlm, id.n=1)
#' 
#' # data frame example: stackloss data
#' data(stackloss)
#' cqplot(stackloss[, 1:3], id.n=4)                # very strange
#' cqplot(stackloss[, 1:3], id.n=4, detrend=TRUE)
#' cqplot(stackloss[, 1:3], id.n=4, method="mve")
#' cqplot(stackloss[, 1:3], id.n=4, method="mcd")
#' 
#' 
#' 
#' @export cqplot
`cqplot` <-
		function(x, ...) UseMethod("cqplot")

#' @rdname cqplot
#' @exportS3Method cqplot mlm
cqplot.mlm <-
		function(x, ...) {
		
		resids <- residuals(x)
		cqplot.default(resids, what=deparse(substitute(x)), ...)
}


#' @rdname cqplot
#' @exportS3Method cqplot default
cqplot.default <- 
		function(x,
			method=c("classical", "mcd", "mve"), 
			detrend=FALSE, 
			pch=19, col = palette()[1], cex = par("cex"),
			ref.col="red", ref.lwd=2,
			conf = 0.95,
			env.col="gray", env.lwd=2, env.lty=1, env.fill=TRUE,  
			fill.alpha=0.2,
			fill.color=trans.colors(ref.col, fill.alpha),
			labels = if (!is.null(rownames(x))) rownames(x) else 1:nrow(x),
			id.n, id.method="y", id.cex=1, id.col = palette()[1],
			xlab, ylab, 
			main, 
			what=deparse(substitute(x)), 
			ylim, ...) {

  # Function to shade concentration band  
  shade <- function(x1, y1, x2, y2, col) {
    n <- length(x2)
    polygon(c(x1, x2[n:1]), c(y1, y2[n:1]), border=NA, col=col)
  }

	df <- ncol(x)
	data <- as.matrix(x)
	OK <- complete.cases(x)

  dsq <- Mahalanobis(data, method=method)
  ord <- order(dsq[OK])
	dsq.y <- dsq[OK][ord]
	labs <- labels[OK][ord]

  # allow col, pch, and cex to be vectors
	if (length(col) == nrow(x)) 
	  col <- col[OK][ord]
	if (length(pch) == nrow(x)) 
	  pch <- pch[OK][ord]
	if (length(cex) == nrow(x)) 
	  cex <- cex[OK][ord]
	
	n <- length(ord)
  p <- ppoints(n)
  chi2q <- qchisq(p, df)

  # approximate confidence envelope
  g <- (chi2q ^(-1+df/2)) / (exp(chi2q/2) * gamma(df/2) * (sqrt(2)^df))
  scale <- IQR(dsq, na.rm=TRUE) / diff(qchisq(c(.25, .75), df))
  se <- (scale/g) * sqrt( p * (1-p) /n)

	semult <- qnorm(1 - (1 - conf)/2)
  lower <- if (detrend) - semult * se else chi2q - semult * se
  upper <- if (detrend)   semult * se else chi2q + semult * se

  if (missing(xlab)) 
      xlab <- bquote(~ chi[.(df)]^2 ~ " Quantile" )

  if (missing(ylab)) 
      ylab <- paste( if (detrend) "Centered" else "",
                    "Squared Mahalanobis Distance" )
	if (missing(main))
			main <- paste( if (detrend) "Detrended" else "",
			 "Chi-Square Q-Q Plot of", what)
			
	if (missing(ylim))
			ylim <- if(detrend) c(min(lower), max(upper))
							else c(0, max(dsq, upper))

	y <- if (detrend) dsq.y - chi2q else dsq.y
  plot(chi2q, y, type="n",
		main = main, 
    ylab = ylab, 
    xlab = xlab,
    ylim = ylim,  ...)
  if (env.fill) shade(chi2q, upper, chi2q, lower, col=fill.color )
  lines(chi2q, lower, col=env.col, lwd=env.lwd, lty=env.lty)
  lines(chi2q, upper, col=env.col, lwd=env.lwd, lty=env.lty)
  points(chi2q, y, pch = pch, col=col, cex=cex)
  
  abline(0, if (detrend) 0 else 1, lwd = ref.lwd, col = ref.col)
  if (!missing(id.n)) {
  	noteworthy <- car::showLabels(chi2q, y, labels=labs, id.method=id.method, 
  		id.n=id.n,  id.cex = id.cex, id.col = id.col)
  	
  	res <- data.frame(dsq[noteworthy])
  	rownames(res) <- labs[noteworthy]
  	return(invisible(res))
  }
  else return(invisible(NULL))

}
friendly/heplots documentation built on March 8, 2024, 3:39 p.m.