Nothing
# ---------------------------------------
# Author: Andreas Alfons, Bernd Prantner
# and Daniel Schopfhauser
# Vienna University of Technology
# ---------------------------------------
#' Growing dot map with information about missing/imputed values
#'
#' Map with dots whose sizes correspond to the values in a certain variable.
#' Observations with missing/imputed values in additional variables are
#' highlighted.
#'
#' The smallest dots correspond to the 10\% quantile and the largest dots to
#' the 99\% quantile. In between, the dots grow exponentially, with `exp`
#' defining the shape of the exponential function. Missings/imputed missings
#' in the variable of interest will be drawn as rectangles.
#'
#' If `interactive=TRUE`, detailed information for an observation can be
#' printed on the console by clicking on the corresponding point. Clicking in
#' a region that does not contain any points quits the interactive session.
#'
#' @aliases growdotMiss bubbleMiss
#' @param x a vector, matrix or `data.frame`.
#' @param coords a matrix or `data.frame` with two columns giving the
#' spatial coordinates of the observations.
#' @param map a background map to be passed to [bgmap()].
#' @param pos a numeric value giving the index of the variable determining the
#' dot sizes.
#' @param delimiter a character-vector to distinguish between variables and
#' imputation-indices for imputed variables (therefore, `x` needs to have
#' [colnames()]). If given, it is used to determine the corresponding
#' imputation-index for any imputed variable (a logical-vector indicating which
#' values of the variable have been imputed). If such imputation-indices are
#' found, they are used for highlighting and the colors are adjusted according
#' to the given colors for imputed variables (see `col`).
#' @param selection the selection method for highlighting missing/imputed
#' values in multiple additional variables. Possible values are `"any"`
#' (highlighting of missing/imputed values in *any* of the additional
#' variables) and `"all"` (highlighting of missing/imputed values in
#' *all* of the additional variables).
#' @param log a logical indicating whether the variable given by `pos`
#' should be log-transformed.
#' @param col a vector of length six giving the colors to be used in the plot.
#' If only one color is supplied, it is used for the borders of non-highlighted
#' dots and the surface area of highlighted dots. Else if two colors are
#' supplied, they are recycled.
#' @param border a vector of length four giving the colors to be used for the
#' borders of the growing dots. Use `NA` to omit borders.
#' @param alpha a numeric value between 0 and 1 giving the level of
#' transparency of the colors, or `NULL`. This can be used to prevent
#' overplotting.
#' @param scale scaling factor of the map.
#' @param size a vector of length two giving the sizes for the smallest and
#' largest dots.
#' @param exp a vector of length three giving the factors that define the shape
#' of the exponential function (see \sQuote{Details}).
#' @param col.map the color to be used for the background map.
#' @param legend a logical indicating whether a legend should be plotted.
#' @param legtitle the title for the legend.
#' @param cex.legtitle the character expansion factor to be used for the title
#' of the legend.
#' @param cex.legtext the character expansion factor to be used in the legend.
#' @param ncircles the number of circles displayed in the legend.
#' @param ndigits the number of digits displayed in the legend. Note that \
#' this is just a suggestion (see [format()]).
#' @param interactive a logical indicating whether information about certain
#' observations can be displayed interactively (see \sQuote{Details}).
#' @param \dots for `growdotMiss`, further arguments and graphical
#' parameters to be passed to [bgmap()]. For `bubbleMiss`, the
#' arguments to be passed to `growdotMiss`.
#' @note The function was renamed to `growdotMiss` in version 1.3.
#' `bubbleMiss` is a (deprecated) wrapper for `growdotMiss` for back
#' compatibility with older versions. However, due to extended functionality,
#' some of the argument positions have changed.
#'
#' The code is based on (removed from CRAN) bubbleFIN from package
#' StatDA.
#' @author Andreas Alfons, Matthias Templ, Peter Filzmoser, Bernd Prantner
#' @seealso [bgmap()], [mapMiss()],
#' [colormapMiss()]
#' @references M. Templ, A. Alfons, P. Filzmoser (2012) Exploring incomplete
#' data using visualization tools. *Journal of Advances in Data Analysis
#' and Classification*, Online first. DOI: 10.1007/s11634-011-0102-y.
#' @keywords hplot
#' @examples
#'
#' data(chorizonDL, package = "VIM")
#' data(kola.background, package = "VIM")
#' coo <- chorizonDL[, c("XCOO", "YCOO")]
#' ## for missing values
#' x <- chorizonDL[, c("Ca","As", "Bi")]
#' growdotMiss(x, coo, kola.background, border = "white")
#'
#' ## for imputed values
#' x_imp <- kNN(chorizonDL[,c("Ca","As","Bi" )])
#' growdotMiss(x_imp, coo, kola.background, delimiter = "_imp", border = "white")
#'
#' @export
growdotMiss <- function(x, coords, map, pos=1, delimiter = NULL, selection = c("any","all"),
log = FALSE, col = c("skyblue", "red", "skyblue4", "red4", "orange", "orange4"),
border = par("bg"), alpha = NULL, scale = NULL,
size = NULL, exp = c(0, 0.95, 0.05),
col.map = grey(0.5), legend = TRUE,
legtitle = "Legend", cex.legtitle = par("cex"),
cex.legtext = par("cex"), ncircles = 6, ndigits = 1,
interactive = TRUE, ...) {
check_data(x)
x <- as.data.frame(x)
# FIXME: infinite values
# code is based on StatDA::bubbleFIN()
# ncircles ... number of circles for the legend
# ndigits ... number of digits for the legend
# error messages
imputed <- FALSE # indicates if there are Variables with missing-index
if(is.null(dim(x))) {
nx <- length(x)
px <- 1
} else {
if(!inherits(x, c("data.frame","matrix"))) {
stop("'x must be a data.frame or matrix")
}
## delimiter ##
if(!is.null(delimiter)) {
tmp <- grep(delimiter, colnames(x)) # Position of the missing-index
if(length(tmp) > 0) {
imp_var <- x[, tmp, drop=FALSE]
x <- x[, -tmp, drop=FALSE]
if(ncol(x) == 0) stop("Only the missing-index is given")
if(is.matrix(imp_var) && range(imp_var) == c(0,1)) imp_var <- apply(imp_var,2,as.logical)
if(is.null(dim(imp_var))) {
if(!is.logical(imp_var)) stop("The missing-index of imputed Variables must be of the type logical")
} else {
if(!any(as.logical(lapply(imp_var,is.logical)))) stop("The missing-index of imputed Variables must be of the type logical")
}
imputed <- TRUE
} else {
warning("'delimiter' is given, but no missing-index-Variable is found", call. = FALSE)
}
}
nx <- nrow(x)
px <- ncol(x)
if(px == 0) stop("'x' has no columns")
}
if(!(inherits(coords, c("data.frame","matrix")))) {
stop("'coords' must be a data.frame or matrix")
}
if(ncol(coords) != 2) stop("'coords' must be 2-dimensional")
if(nx != nrow(coords)) {
stop("'x' and 'coords' must have equal number of elements/rows")
}
# if(length(col) == 0) col <- c("skyblue","red","red4")
# else if(length(col) == 1) {
# border <- c(col, "transparent", "transparent")
# col <- c("transparent", col, col)
# } else if(length(col) == 2) col <- rep(col, 1:2)
# else if(length(col) > 3) col <- col[1:3]
# if(length(border) == 0) border <- par("bg")
# else if(length(border) == 1) border <- rep.int(border, 3)
# else if(length(border) == 2) border <- rep(border, 1:2)
# else if(length(border) > 3) border <- border[1:3]
if(length(col) == 0) col <- c("skyblue", "red", "skyblue4", "red4", "orange", "orange4")
else if(length(col) == 1) {
border <- rep.int(c(col, "transparent"), 2)
col <- c(rep.int(c("transparent", col), 2),rep.int(col,2))
} else if(length(col) == 3 || length(col) == 5) col <- rep.int(col[1:2], 3)
else if(length(col) != 6) col <- rep(col, length.out=6)
if(length(border) == 0) border <- par("bg")
else if(length(border) == 1) lty <- rep.int(border, 4)
else if(length(border) == 3) border <- rep.int(border[1:2], 2)
else if(length(border) != 4) border <- rep(border, length.out=4)
coords <- as.data.frame(coords)
if(px > 1) {
if(!is.numeric(pos) || length(pos) != 1 || (px < pos)) {
stop("'pos' must be an integer specifying one column of 'x'")
}
selection <- match.arg(selection)
}
if(!is.null(alpha)) {
col <- alphablend(col, alpha) # semitransparent colors
border <- alphablend(border, alpha) # semitransparent borders
}
# initialize plot
bgmap(map, col=col.map, ...)
if(px == 1) {
if(!imputed) missPos <- is.na(x) # indicates missings in plot variable
else missPos <- isImp(x, pos = NULL, delimiter = delimiter, imp_var = imp_var, selection = selection)[["missh"]]
missOther <- rep.int(FALSE, nx)
z <- as.numeric(x[!missPos]) # observed values in plot variable
miss <- rep.int(FALSE, length(z))
} else {
if(!imputed) {
missPos <- is.na(x[, pos]) # indicates missings in plot variable
missOther <- isNA(x[, -pos, drop=FALSE], selection)
z <- as.numeric(x[!missPos, pos]) # observed values in plot variable
miss <- isNA(x[!missPos, -pos, drop=FALSE], selection)
} else {
tmp <- isImp(x, pos = pos, delimiter = delimiter, imp_var = imp_var, selection = selection)
missPos <- tmp[["misspos"]]
missOther <- tmp[["missh"]]
z <- as.numeric(x[!missPos, pos]) # observed values in plot variable
miss <- isImp(x[!missPos, -pos ,drop=FALSE], pos = NULL, delimiter = delimiter, imp_var = imp_var[!missPos,], selection = selection)[["missh"]]
}
}
if(log) {
if(any(z < 0)) stop("cannot use logarithm with negative values")
z <- log10(z)
}
if(is.null(size)) { # default size depends on area and sample density
# retrieve bounding box for background map
usr <- par("usr")
xr <- usr[1:2]
if(par("xaxs") == "r") xr <- xr + c(1,-1)*diff(xr)*0.04/1.08
yr <- usr[3:4]
if(par("yaxs") == "r") yr <- yr + c(1,-1)*diff(yr)*0.04/1.08
# area of bounding box
Abox <- diff(xr)*diff(yr)
maxsize <- sqrt(Abox/nx)
size <- c(maxsize/10, maxsize)
scale <- NULL
}
if(length(z)) {
mnz <- min(z)
zz <- if(mnz < 0) z + abs(mnz) else z
q1 <- quantile(zz, 0.1)
q2 <- quantile(zz, 0.99)
c <- q1 / (q2/q1)^(exp[1]/exp[2])
C <- q2 / (q1/q2)^(exp[3]/exp[2])
xi <- pmax(pmin(zz,C), c)
di <- size[1] * (size[2]/size[1])^(log10(xi/c)/log10(C/c))
if(!is.null(scale)) di <- scale * di
coordsobs <- coords[!(missPos | missOther),]
diobs <- di[!miss]
ordobs <- order(z[!miss], decreasing=TRUE)
circles(coordsobs[ordobs, 1], coordsobs[ordobs, 2],
diobs[ordobs]/2, col=col[1], border=border[1])
# observations with missings in other variables
coordsmiss <- coords[!missPos & missOther,]
dimiss <- di[miss]
ordmiss <- order(z[miss], decreasing=TRUE)
if(!imputed) color <- col[2]
else color <- col[5]
circles(coordsmiss[ordmiss, 1], coordsmiss[ordmiss, 2],
dimiss[ordmiss]/2, col=color, border=border[2])
}
# missings in plot variable
if(any(missPos)) {
# sqx <- (C+c)/2
# s <- size[1] * (size[2]/size[1])^(log10(sqx/c)/log10(C/c)) / 2
s <- mean(size) * 0.35
if(!is.null(scale)) s <- scale * s
cp <- coords[missPos & !missOther, , drop=FALSE]
co <- coords[missPos & missOther, , drop=FALSE]
rect(cp[,1]-s, cp[,2]-s, cp[,1]+s, cp[,2]+s,
col=col[3], border=border[3])
if(!imputed) color <- col[4]
else color <- col[6]
rect(co[,1]-s, co[,2]-s, co[,1]+s, co[,2]+s,
col=color, border=border[4])
}
# add legend (top right)
if(length(z) && legend) {
probs <- seq(1, 0, length.out=ncircles)
diq <- quantile(di, probs=probs)
#zq <- quantile(x[!missPos, pos], probs=probs)
if(px == 1) zq <- quantile(x[!missPos], probs=probs)
else zq <- quantile(x[!missPos, pos], probs=probs)
lsheight <- strheight(legtitle, cex=cex.legtitle)
legtext <- format(zq, digits=ndigits)
maxsheight <- max(strheight(legtext, cex=cex.legtext))
maxswidth <- max(strwidth(legtext, cex=cex.legtext))
xmax <- max(coords[,1])
ymax <- max(coords[,2])
xt <- xmax - maxswidth
xc <- xt - max(diq)
yc <- ymax - lsheight*2
yc <- c(yc, yc - maxsheight*1.5*(1:(length(diq)-1)))
circles(xc, yc, diq/2, col=col[1], border=border[1])
text(xt, yc, legtext, adj=0, cex=cex.legtext)
lswidth <- strwidth(legtitle, cex=cex.legtitle)
tswidth <- max(diq)*2 + maxswidth
if(lswidth > tswidth)
text(xmax, ymax, legtitle, adj=1, cex=cex.legtitle)
else text(xmax-tswidth, ymax, legtitle, adj=0, cex=cex.legtitle)
}
if(interactive) {
cat("\nClick on a point to get more information.\n")
cat(paste("To regain use of the VIM GUI and the R console,",
"click in a region that does not contain any points.\n\n"))
identifyPt <- function(p, x) { # function to identify closest point
if(is.null(p) || nrow(x) == 0) return(NA)
d <- sqrt(colSums((t(x)-p)^2))
m <- min(d, na.rm=TRUE)
r <- apply(x,2,range, na.rm=TRUE)
r <- max(r[2,]-r[1,])
if(m/r < 0.05) which(d == min(d, na.rm=TRUE))
else NA
}
pt <- locatorVIM()
ind <- identifyPt(unlist(pt), coords) # get closest point
while(!is.na(ind)) {
# print(x[ind,])
if(px == 1) print(x[ind]) # print values for
else print(x[ind,]) # the identified point
pt <- locatorVIM()
ind <- identifyPt(unlist(pt), coords)
}
}
invisible()
}
# compatibility wrapper
bubbleMiss <- function(...) {
growdotMiss(...)
}
# modified version of Peter Filzmoser's function in package 'StatDA'
circles <- function(x, y, radius, col=NA, border=par("fg")) {
#draw circles
nmax <- max(length(x), length(y));
if (length(x) < nmax) x <- rep(x, length=nmax);
if (length(y) < nmax) y <- rep(y, length=nmax);
if (length(col) < nmax) col <- rep(col, length=nmax);
if (length(border) < nmax) border <- rep(border, length=nmax);
if (length(radius) < nmax) radius <- rep(radius, length=nmax);
theta <- 2* pi * seq(0, 355, by=5) / 360;
ct <- cos(theta);
st <- sin(theta);
#for(i in 1:nmax)
# polygon(x[i] + ct * radius[i], y[i] + st * radius[i],
# col=col[i], border=border[i]);
xmat <- mapply(function(x,r,c) x+c*r, x, radius, MoreArgs=list(ct))
ymat <- mapply(function(y,r,s) y+s*r, y, radius, MoreArgs=list(st))
xvec <- as.vector(rbind(xmat, rep(NA, length(x))))
yvec <- as.vector(rbind(ymat, rep(NA, length(y))))
polygon(xvec, yvec, col=col, border=border)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.