R/meanExcessPlot.R
In laeken: Estimation of Indicators on Social Exclusion and Poverty

Documented in meanExcessPlot

# ----------------------------------------
# Authors: Josef Holzer and Andreas Alfons
#          Vienna University of Technology
# ----------------------------------------

#' Mean excess plot
#'
#' The Mean Excess plot is a graphical method for detecting the threshold (scale
#' parameter) of a Pareto distribution.
#'
#' The corresponding mean excesses are plotted against the values of \code{x}
#' (if supplied, only those specified by \code{probs}).  If the tail of the data
#' follows a Pareto distribution, these observations show a positive linear
#' trend. The leftmost point of a fitted line can thus be used as an estimate of
#' the threshold (scale parameter).
#'
#' The interactive selection of the threshold (scale parameter) is implemented
#' using \code{\link[graphics]{identify}}.  For the usual \code{X11} device, the
#' selection process is thus terminated by pressing any mouse button other than
#' the first.  For the \code{quartz} device (on Mac OS X systems), the process
#' is terminated either by a secondary click (usually second mouse button or
#' \code{Ctrl}-click) or by pressing the \code{ESC} key.
#'
#' @param x a numeric vector.
#' @param w an optional numeric vector giving sample weights.
#' @param probs an optional numeric vector of probabilities with values in
#' \eqn{[0,1]}, defining the quantiles to be plotted.  This is useful for large
#' data sets, when it may not be desirable to plot every single point.
#' @param interactive a logical indicating whether the threshold (scale
#' parameter) can be selected interactively by clicking on points.  Information
#' on the selected threshold is then printed on the console.
#' @param pch,cex,col,bg graphical parameters for the plot symbol of each data
#' point or quantile (see \code{\link[graphics]{points}}).
#' @param \dots additional arguments to be passed to
#' \code{\link[graphics]{plot.default}}.
#'
#' @return If \code{interactive} is \code{TRUE}, the last selection for the
#' threshold is returned invisibly as an object of class \code{"paretoScale"},
#' which consists of the following components:
#' \item{x0}{the selected threshold (scale parameter).}
#' \item{k}{the number of observations in the tail (i.e., larger than the
#' threshold).}
#'
#' @note The functionality to account for sample weights and to select the
#' threshold (scale parameter) interactively was introduced in version 0.2.
#'
#' @author Andreas Alfons and Josef Holzer
#'
#' @seealso \code{\link{paretoScale}}, \code{\link{paretoTail}},
#' \code{\link{minAMSE}}, \code{\link{paretoQPlot}},
#' \code{\link[graphics]{identify}}
#'
#' @keywords hplot
#'
#' @examples
#' data(eusilc)
#' # equivalized disposable income is equal for each household
#' # member, therefore only one household member is taken
#' eusilc <- eusilc[!duplicated(eusilc$db030),]
#'
#' # with sample weights
#' meanExcessPlot(eusilc$eqIncome, w = eusilc$db090)
#'
#' # without sample weights
#' meanExcessPlot(eusilc$eqIncome)
#'
#' @importFrom graphics identify abline par plot
#' @importFrom stats quantile weighted.mean
#' @export

meanExcessPlot <- function(x, w = NULL, probs = NULL, interactive = TRUE,
        pch = par("pch"), cex = par("cex"), col = par("col"),
        bg = "transparent", ...) {
    ## initializations
    n <- length(x)
    if(!is.numeric(x) || n == 0) stop("'x' must be a numeric vector")
    if(!is.null(w)) {
        if(!is.numeric(w) || length(w) != n) {
            stop("'w' must be numeric vector of the same length as 'x'")
        }
        if(any(w < 0)) stop("negative weights in 'w'")
    }
    haveProbs <- !is.null(probs)
    if(haveProbs) n <- length(probs)
    if(length(pch) > 1) pch <- rep(pch, length.out=n)
    if(length(cex) > 1) cex <- rep(cex, length.out=n)
    if(length(col) > 1) col <- rep(col, length.out=n)
    if(length(bg) > 1) bg <- rep(bg, length.out=n)
    if(any(i <- is.na(x))) {  # remove missing values
        x <- x[!i]
        if(!is.null(w)) w <- w[!i]
        if(!haveProbs) {
            if(length(pch) > 1) pch <- pch[!i]
            if(length(cex) > 1) cex <- cex[!i]
            if(length(col) > 1) col <- col[!i]
            if(length(bg) > 1) bg <- bg[!i]
            n <- length(x)
        }
        if(length(x) == 0) stop("no observed values")
    }
    ## use observed values or quantiles as thresholds
    if(haveProbs) {
        if(is.null(w)) {  # no weights
            mu <- quantile(x, probs, names=FALSE, type=1)  # compute quantiles
        } else {  # weights are supplied
            mu <- weightedQuantile(x, w, probs)  # compute weighted quantiles
        }
        if(max(mu) >= max(x)) stop("largest threshold too high")
    } else {
        order <- order(x)
        keep <- seq_len(n-sqrt(n))
        mu <- unname(x[order][keep])
        if(length(pch) > 1) pch <- pch[order][keep]
        if(length(cex) > 1) cex <- cex[order][keep]
        if(length(col) > 1) col <- col[order][keep]
        if(length(bg) > 1) bg <- bg[order][keep]
    }
    ## compute mean excesses for the different thresholds
    # this could be done much faster with C (incremental computation)
    if(is.null(w)) meanExcess <- function(mu) mean(x[x > mu] - mu)
    else {
        meanExcess <- function(mu) {
            i <- x > mu
            weighted.mean(x[i] - mu, w[i])
        }
    }
    me <- sapply(mu, meanExcess)
    ## create plot
    localPlot <- function(x, y, main = "Mean excess plot",
            xlab = "Threshold", ylab = "Mean excess", ...) {
        plot(x, y, main=main, xlab=xlab, ylab=ylab, ...)
    }
    localPlot(mu, me, pch=pch, cex=cex, col=col, bg=bg, ...)
    ## interactive identification of threshold
    res <- NULL
    if(isTRUE(interactive)) {
        nextIndex <- identify(mu, me, n=1, plot=FALSE)
        i <- 1
        while(!identical(nextIndex, integer())) {
            index <- nextIndex
            x0 <- mu[index]
            res <- list(x0=x0, k=length(which(x > x0)))
            class(res) <- "paretoScale"
            if(i > 1) cat("\n")
            print(res)
            nextIndex <- identify(mu, me, n=1, plot=FALSE)
            i <- i + 1
        }
        # indicate selected threshold by horizontal and vertical lines
        if(!is.null(res)) {
            abline(h=me[index], lty=3)
            abline(v=x0, lty=3)
        }
    }
    ## return result invisibly
    invisible(res)
}