Nothing
# ----------------------------------------------------------
# Authors: Andreas Alfons, Bernd Prantner, Matthias Templ
# and Daniel Schopfhauser
# Vienna University of Technology
# ----------------------------------------------------------
#' Scatterplot matrix with information about missing/imputed values
#'
#' Scatterplot matrix in which observations with missing/imputed values in
#' certain variables are highlighted.
#'
#' `scattmatrixMiss` uses [pairsVIM()] with a panel function
#' that allows highlighting of missing/imputed values.
#'
#' If `interactive=TRUE`, the variables to be used for highlighting can be
#' selected interactively. Observations with missing/imputed values in any or
#' in all of the selected variables are highlighted (as determined by
#' `selection`). A variable can be added to the selection by clicking in
#' a diagonal panel. If a variable is already selected, clicking on the
#' corresponding diagonal panel removes it from the selection. Clicking
#' anywhere else quits the interactive session.
#'
#' The graphical parameter `oma` will be set unless supplied as an
#' argument.
#'
#' `TKRscattmatrixMiss` behaves like `scattmatrixMiss`, but uses
#' tkrplot to embed the plot in a *Tcl/Tk* window.
#' This is useful if the number of variables is large, because scrollbars allow
#' to move from one part of the plot to another.
#'
#' @param x a matrix or `data.frame`.
#' @param delimiter a character-vector to distinguish between variables and
#' imputation-indices for imputed variables (therefore, `x` needs to have
#' [colnames()]). If given, it is used to determine the corresponding
#' imputation-index for any imputed variable (a logical-vector indicating which
#' values of the variable have been imputed). If such imputation-indices are
#' found, they are used for highlighting and the colors are adjusted according
#' to the given colors for imputed variables (see `col`).
#' @param highlight a vector giving the variables to be used for highlighting.
#' If `NULL` (the default), all variables are used for highlighting.
#' @param selection the selection method for highlighting missing/imputed
#' values in multiple highlight variables. Possible values are `"any"`
#' (highlighting of missing/imputed values in *any* of the highlight
#' variables) and `"all"` (highlighting of missing/imputed values in
#' *all* of the highlight variables).
#' @param plotvars a vector giving the variables to be plotted. If `NULL`
#' (the default), all variables are plotted.
#' @param col a vector of length three giving the colors to be used in the
#' plot. The second/third color will be used for highlighting missing/imputed
#' values.
#' @param alpha a numeric value between 0 and 1 giving the level of
#' transparency of the colors, or `NULL`. This can be used to prevent
#' overplotting.
#' @param pch a vector of length two giving the plot characters. The second
#' plot character will be used for the highlighted observations.
#' @param lty a vector of length two giving the line types for the density
#' plots in the diagonal panels (if `diagonal="density"`). The second
#' line type is used for the highlighted observations. If a single value is
#' supplied, it is used for both non-highlighted and highlighted observations.
#' @param diagonal a character string specifying the plot to be drawn in the
#' diagonal panels. Possible values are `"density"` (density plots for
#' non-highlighted and highlighted observations) and `"none"`.
#' @param interactive a logical indicating whether the variables to be used for
#' highlighting can be selected interactively (see \sQuote{Details}).
#' @param \dots for `scattmatrixMiss`, further arguments and graphical
#' parameters to be passed to [pairsVIM()]. `par("oma")` will
#' be set appropriately unless supplied (see [graphics::par()]). For
#' `TKRscattmatrixMiss`, further arguments to be passed to
#' `scattmatrixMiss`.
#' @note Some of the argument names and positions have changed with version 1.3
#' due to a re-implementation and for more consistency with other plot
#' functions in `VIM`. For back compatibility, the argument
#' `colcomb` can still be supplied to \code{\dots{}} and is handled
#' correctly. Nevertheless, it is deprecated and no longer documented. Use
#' `highlight` instead. The arguments `smooth`, `reg.line` and
#' `legend.plot` are no longer used and ignored if supplied.
#' @author Andreas Alfons, Matthias Templ, modifications by Bernd Prantner
#' @seealso [pairsVIM()], [marginmatrix()]
#' @references M. Templ, A. Alfons, P. Filzmoser (2012) Exploring incomplete
#' data using visualization tools. *Journal of Advances in Data Analysis
#' and Classification*, Online first. DOI: 10.1007/s11634-011-0102-y.
#' @keywords hplot
#' @family plotting functions
#' @examples
#'
#' data(sleep, package = "VIM")
#' ## for missing values
#' x <- sleep[, 1:5]
#' x[,c(1,2,4)] <- log10(x[,c(1,2,4)])
#' scattmatrixMiss(x, highlight = "Dream")
#'
#' ## for imputed values
#' x_imp <- kNN(sleep[, 1:5])
#' x_imp[,c(1,2,4)] <- log10(x_imp[,c(1,2,4)])
#' scattmatrixMiss(x_imp, delimiter = "_imp", highlight = "Dream")
#'
#' @export
scattmatrixMiss <- function(x, delimiter = NULL, highlight = NULL,
selection = c("any","all"), plotvars = NULL,
col = c("skyblue","red","orange"), alpha = NULL,
pch = c(1,3), lty = par("lty"),
diagonal = c("density","none"),
interactive = TRUE, ...) {
check_data(x)
x <- as.data.frame(x)
# initializations and error messages
imputed <- FALSE # indicates if there are Variables with missing-index
## delimiter ##
if(!is.null(delimiter)) {
tmp <- grep(delimiter, colnames(x)) # Position of the missing-index
if(length(tmp) > 0) {
imp_var <- x[, tmp, drop=FALSE]
x <- x[, -tmp, drop=FALSE]
if(ncol(x) == 0) stop("Only the missing-index is given")
if(is.matrix(imp_var) && range(imp_var) == c(0,1)) imp_var <- apply(imp_var,2,as.logical)
if(is.null(dim(imp_var))) {
if(!is.logical(imp_var)) stop("The missing-index of imputed Variables must be of the type logical")
} else {
if(!any(as.logical(lapply(imp_var,is.logical)))) stop("The missing-index of imputed Variables must be of the type logical")
}
imputed <- TRUE
} else {
warning("'delimiter' is given, but no missing-index-Variable is found", call. = FALSE)
}
}
px <- ncol(x)
if(is.null(colnames(x))) colnames(x) <- defaultNames(px)
if(length(highlight) > px) stop("'highlight' is too long")
if(length(plotvars) > px) stop("'plotvars' is too long")
z <- if(is.null(plotvars)) x else x[, plotvars, drop=FALSE]
pz <- ncol(z)
if(pz < 2) stop("the data to be plotted must be at least 2-dimensional")
selection <- match.arg(selection)
if(length(col) == 0) col <- c("skyblue","red","orange")
if(length(pch) == 0) pch <- c(1,3)
if(length(col) == 1 && length(pch) == 1) {
stop("same color and plot symbol for observed and missing values")
}
if(length(col) == 1) col <- rep(col, 3)
else if(length(col) == 2) col <- rep(col,1:2)
else if(length(col) > 3) col <- col[1:3]
if(length(pch) == 1) pch <- rep(pch, 2)
else if(length(pch) > 2) pch <- pch[1:2]
# semitransparent colors
colalpha <- alphablend(col, alpha)
# prepare data
if(is.data.frame(z)) z <- data.matrix(z)
else if(mode(z) != "numeric") mode(z) <- "numeric"
# diagonal panel
diagonal <- match.arg(diagonal)
if(diagonal == "density") {
if(length(lty) == 0) lty <- par("lty")
if(length(lty) == 1) {
if(col[1] == col[2]) {
stop("same color and line type for observed and missing values")
} else if(col[1] == col[3]) {
stop("same color and line type for observed and imputed values")
}
lty <- rep.int(lty, 2)
}
else if(length(lty) > 2) lty <- lty[1:2]
}
# initialize call
localPairs <- function(..., colcomb, smooth, reg.line, legend.plot) {
pairsVIM(...)
}
ca <- as.call(list(localPairs, ...))
# back compatibility
if(missing(highlight) && !is.null(ca$colcomb)) {
if(length(ca$colcomb) && ca$colcomb[1] == "missnonmiss") {
highlight <- NULL
} else highlight <- ca$colcomb
}
if(interactive) {
# 'gap', 'oma' and 'layout' are needed later on
if(is.null(ca$oma)) {
# only 'oma' is used by 'pairsVIM' for outer margins
ca$oma <- rep.int(4, 4)
if(!is.null(ca$main)) ca$oma[3] <- 6
if(!is.null(ca$sub)) ca$oma[1] <- 5
}
if(is.null(ca$gap)) ca$gap <- 1
if(is.null(ca$layout)) ca$layout <- "matrix"
}
createPlot <- function() {
# find observations with missings
if(is.null(highlight)) {
if(!imputed) NAvec <- isNA(x, selection)
else NAvec <- isImp(x, pos = NULL, delimiter = delimiter, imp_var = imp_var, selection = selection)[["missh"]]
}
else {
if(!imputed) NAvec <- isNA(x[, highlight], selection)
else NAvec <- isImp(x[, highlight, drop=FALSE], pos = NULL, delimiter = delimiter, imp_var = imp_var, selection = selection)[["missh"]]
}
# panel functions
panel.miss <- function(x, y, ...) {
if(!imputed) {
miss <- NULL
color <- colalpha[2]
} else {
tmp <- isImp(cbind(x,y), pos = 1, delimiter = delimiter, imp_var = imp_var, selection = "none")
miss <- cbind(tmp[["misspos"]],tmp[["missh"]])
color <- colalpha[3]
}
xOK <- x[!NAvec, ,drop = FALSE]
yOK <- y[!NAvec, ,drop = FALSE]
points(xOK, yOK, col=colalpha[1], pch=pch[1], ...)
rugNA(xOK, yOK, miss = miss[!NAvec,], side=1, col=colalpha[1])
rugNA(xOK, yOK, miss = miss[!NAvec,], side=2, col=colalpha[1])
xNA <- x[NAvec]
yNA <- y[NAvec]
points(xNA, yNA, col=color, pch=pch[2], ...)
rugNA(xNA, yNA, miss = miss[NAvec,], side=1, col=color)
rugNA(xNA, yNA, miss = miss[NAvec,], side=2, col=color)
}
panel.density <- function(x, ...) {
if(!all(is.na(x))) {
xobs <- x[!NAvec]
xobs <- xobs[is.finite(xobs)]
xNA <- x[NAvec]
xNA <- xNA[is.finite(xNA)]
rx <- range(x, finite=TRUE)
if(par("ylog")) {
# y-axis should not be on logarithmic scale
localPlot <- function(..., type, log, main, sub, xlab, ylab,
ann, axes, frame.plot, panel.first, panel.last) {
par(new=TRUE)
log <- if(par("xlog")) "x" else ""
plot(..., type="n", log=log, ann=FALSE, axes=FALSE)
}
localPlot(rx, rx, ...)
}
if(length(xobs)) dobs <- density(xobs, from=rx[1], to=rx[2])
else dobs <- list(x=NULL, y=NULL)
if(length(xNA)) dNA <- density(xNA, from=rx[1], to=rx[2])
else dNA <- list(x=NULL, y=NULL)
dy <- c(dobs$y, dNA$y) # y-values of both densities
if(length(dy)) { # cannot compute maximum otherwise
mdy <- max(c(dobs$y, dNA$y))
lines(dobs$x, rx[1]+dobs$y*diff(rx)/mdy,
col=col[1], lty=lty[1],
...)
if(!imputed) color <- col[2]
else color <- col[3]
lines(dNA$x, rx[1]+dNA$y*diff(rx)/mdy,
col=color, lty=lty[2],
...)
}
}
}
# panel.text <- function(x, y, txt, cex, font, ...) {
# usr <- par("usr")
# txt.width <- strwidth(txt, cex=cex, font=font, ...)
# txt.height <- strheight(txt, cex=cex, font=font, ...)
# if(txt.width < diff(usr[1:2]) && txt.height < diff(usr[3:4])) {
# text(x, y, txt, cex = cex, font = font, ...)
# }
# }
ca$x <- z
ca$panel <- panel.miss
ca$lower <- NULL
ca$upper <- NULL
ca$diagonal <- switch(diagonal, density=panel.density, none=NULL)
# ca$text.panel <- panel.text
eval(ca)
}
createPlot()
# check for interactive graphics device
dev <- names(dev.cur())
interactiveDevices <- c("X11","quartz","windows")
if(interactive && any(!is.na(charmatch(interactiveDevices, dev)))) {
cat(paste("\nClick in a diagonal panel to add to",
"or remove from the highlight selection.\n"))
cat(paste("To regain use of the VIM GUI and the R console,",
"click anywhere else in the graphics window.\n\n"))
# retrieve geometry of graphics device
gap <- ca$gap
oma <- ca$oma
rf <- if(pz == 2) 5/6 else 2/3 # reduction factor for line height
op <- par(mar=oma*rf, usr=c(0,1,0,1))
on.exit(par(op))
xcenter <- seq(from=1/(2*pz), by=1/pz, length.out=pz)
ycenter <- if(ca$layout == "matrix") rev(xcenter) else xcenter
cxy <- par("cxy") # cxy[2] gives the line height of the graphics device
lxy <- 1/(2*pz) - gap*cxy[2]*rf/2 # half side length of the panels
xleft <- xcenter - lxy
ybottom <- ycenter - lxy
xright <- xcenter + lxy
ytop <- ycenter + lxy
# initializations for selection
cn <- colnames(x)
if(is.null(highlight)) highlight <- cn
else if(!is.character(highlight)) highlight <- cn[highlight]
plotvars <- colnames(z)
# start interactive session
highlightInfo(highlight, selection, imputed) # print out current selection
pt <- locatorVIM(error=TRUE)
while(!is.null(pt) && class(pt) != "try-error") {
i <- which(pt$y > ybottom & pt$y < ytop)
j <- which(pt$x > xleft & pt$x < xright)
# # can't happen since margins must be non-negative
# if(length(i) > 0) i <- i[1]
# if(length(j) > 0) j <- j[1]
if(length(i) && length(j) && i == j) {
highlight <-
if(plotvars[i] %in% highlight)
setdiff(highlight, plotvars[i])
else c(highlight, plotvars[i])
createPlot()
highlightInfo(highlight, selection, imputed) # print out current selection
# # make sure user coordinate system remains as set up above
# # before locator is called again (pairs seems to leave user
# # coordinates in an uncontrollable state if 'row1attop=FALSE')
# par(mar=oma*rf, usr=c(0,1,0,1))
pt <- locatorVIM(error=TRUE)
}
else pt <- NULL
}
if(inherits(pt, "try-error")) on.exit()
}
invisible()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.