ci2d: Create 2-dimensional empirical confidence regions
In r-gregmisc/gplots: Various R Programming Tools for Plotting Data

Description Usage Arguments Details Value Note Author(s) See Also Examples

Create 2-dimensional empirical confidence regions from provided data.

ci2d(x, y = NULL,
     nbins=51, method=c("bkde2D","hist2d"),
     bandwidth, factor=1.0,
     ci.levels=c(0.50,0.75,0.90,0.95,0.975),
     show=c("filled.contour","contour","image","none"),
     col=topo.colors(length(breaks)-1),
     show.points=FALSE,
     pch=par("pch"),
     points.col="red",
     xlab, ylab, 
     ...)
## S3 method for class 'ci2d'
print(x, ...)

`x`	either a vector containing the x coordinates or a matrix with 2 columns.
`y`	a vector contianing the y coordinates, not required if ‘x’ is matrix
`nbins`	number of bins in each dimension. May be a scalar or a 2 element vector. Defaults to 51.
`method`	One of "bkde2D" (for KernSmooth::bdke2d) or "hist2d" (for gplots::hist2d) specifyting the name of the method to create the 2-d density summarizing the data. Defaults to "bkde2D".
`bandwidth`	Bandwidth to use for `KernSmooth::bkde2D`. See below for default value.
`factor`	Numeric scaling factor for bandwidth. Useful for exploring effect of changing the bandwidth. Defaults to 1.0.
`ci.levels`	Confidence level(s) to use for plotting data. Defaults to `c(0.5, 0.75, 0.9, 0.95, 0.975)`
`show`	Plot type to be displaed. One of "filled.contour", "contour", "image", or "none". Defaults to "filled.contour".
`show.points`	Boolean indicating whether original data values should be plotted. Defaults to `TRUE`.
`pch`	Point type for plots. See `points` for details.
`points.col`	Point color for plotting original data. Defaiults to "red".
`col`	Colors to use for plots.
`xlab, ylab`	Axis labels
`...`	Additional arguments passed to `KernSmooth::bkde2D` or `gplots::hist2d`.

This function utilizes either KernSmooth::bkde2D or gplots::hist2d to estmate a 2-dimensional density of the data passed as an argument. This density is then used to create and (optionally) display confidence regions.

When bandwidth is ommited and method="bkde2d", KernSmooth::dpik is appled in x and y dimensions to select the bandwidth.

A ci2d object consisting of a list containing (at least) the following elements:

`nobs`	number of original data points
`x`	x position of each density estimate bin
`y`	y position of each density estimate bin
`density`	Matrix containing the probability density of each bin (count in bin/total count)
`cumDensity`	Matrix where each element contains the cumulative probability density of all elements with the same density (used to create the confidence region plots)
`contours`	List of contours of each confidence region.
`call`	Call used to create this object

Confidence intervals generated by ci2d are approximate, and are subject to biases and/or artifacts induced by the binning or kernel smoothing method, bin locations, bin sizes, and kernel bandwidth.

The conf2d function in the r2d2 package may create a more accurate confidence region, and reports the actual proportion of points inside the region.

Gregory R. Warnes greg@warnes.net

bkde2D, conf2d, dpik, hist2d

   ####
   ## Basic usage 
   ####
   data(geyser, package="MASS")

   x <- geyser$duration
   y <- geyser$waiting

   # 2-d confidence intervals based on binned kernel density estimate
   ci2d(x,y)                   # filled contour plot
   ci2d(x,y, show.points=TRUE) # show original data


   # image plot
   ci2d(x,y, show="image")
   ci2d(x,y, show="image", show.points=TRUE)

   # contour plot
   ci2d(x,y, show="contour", col="black")
   ci2d(x,y, show="contour", col="black", show.points=TRUE)

   ####
   ## Control Axis scales
   ####
   x <- rnorm(2000, sd=4)
   y <- rnorm(2000, sd=1)

   # 2-d confidence intervals based on binned kernel density estimate
   ci2d(x,y)

   # 2-d confidence intervals based on 2d histogram
   ci2d(x,y, method="hist2d", nbins=25)
 
   # Require same scale for each axis, this looks oval
   ci2d(x,y, range.x=list(c(-20,20), c(-20,20)))
   ci2d(x,y, method="hist2d", same.scale=TRUE, nbins=25) # hist2d 

   ####
   ## Control smoothing and binning 
   ####
   x <- rnorm(2000, sd=4)
   y <- rnorm(2000, mean=x, sd=2)

   # Default 2-d confidence intervals based on binned kernel density estimate
   ci2d(x,y)

   # change the smoother bandwidth
   ci2d(x,y,
        bandwidth=c(sd(x)/8, sd(y)/8)
       )

   # change the smoother number of bins
   ci2d(x,y, nbins=10)
   ci2d(x,y)
   ci2d(x,y, nbins=100)

   # Default 2-d confidence intervals based on 2d histogram
   ci2d(x,y, method="hist2d", show.points=TRUE)

   # change the number of histogram bins
   ci2d(x,y, nbin=10, method="hist2d", show.points=TRUE )
   ci2d(x,y, nbin=25, method="hist2d", show.points=TRUE )

   ####
   ## Perform plotting manually
   ####
   data(geyser, package="MASS")

   # let ci2d handle plotting contours...
   ci2d(geyser$duration, geyser$waiting, show="contour", col="black")

   # call contour() directly, show the 90 percent CI, and the mean point 
   est <- ci2d(geyser$duration, geyser$waiting, show="none")
   contour(est$x, est$y, est$cumDensity,
           xlab="duration", ylab="waiting",
           levels=0.90, lwd=4, lty=2)
   points(mean(geyser$duration), mean(geyser$waiting),
         col="red", pch="X")


   ####
   ## Extract confidence region values
   ###
   data(geyser, package="MASS")

   ## Empirical 90 percent confidence limits
   quantile( geyser$duration, c(0.05, 0.95) )
   quantile( geyser$waiting, c(0.05, 0.95) )

   ## Bivariate 90 percent confidence region
   est <- ci2d(geyser$duration, geyser$waiting, show="none")
   names(est$contours) ## show available contours

   ci.90 <- est$contours[names(est$contours)=="0.9"]  # get region(s)
   ci.90 <- rbind(ci.90[[1]],NA, ci.90[[2]], NA, ci.90[[3]]) # join them

   print(ci.90)                  # show full contour
   range(ci.90$x, na.rm=TRUE)    # range for duration
   range(ci.90$y, na.rm=TRUE)    # range for waiting

   ####
   ## Visually compare confidence regions 
   ####
   data(geyser, package="MASS")

   ## Bivariate smoothed 90 percent confidence region
   est <- ci2d(geyser$duration, geyser$waiting, show="none")
   names(est$contours) ## show available contours

   ci.90 <- est$contours[names(est$contours)=="0.9"]  # get region(s)
   ci.90 <- rbind(ci.90[[1]],NA, ci.90[[2]], NA, ci.90[[3]]) # join them

   plot( waiting ~ duration, data=geyser,
         main="Comparison of 90 percent confidence regions" )
   polygon( ci.90, col="green", border="green", density=10)

   ## Univariate Normal-Theory 90 percent confidence region
   mean.x <- mean(geyser$duration)
   mean.y <- mean(geyser$waiting)
   sd.x <- sd(geyser$duration)
   sd.y <- sd(geyser$waiting)

   t.value <- qt(c(0.05,0.95), df=gdata::nobs(geyser$duration), lower=TRUE)
   ci.x <- mean.x +  t.value* sd.x
   ci.y <- mean.y +  t.value* sd.y

   plotCI(mean.x, mean.y,
          li=ci.x[1],
          ui=ci.x[2],
          barcol="blue", col="blue",
          err="x",
          pch="X",
          add=TRUE )

   plotCI(mean.x, mean.y,
          li=ci.y[1],
          ui=ci.y[2],
          barcol="blue", col="blue",
          err="y",
          pch=NA,
          add=TRUE )

#   rect(ci.x[1], ci.y[1], ci.x[2], ci.y[2], border="blue",
#        density=5,
#        angle=45,
#        col="blue" )


   ## Empirical univariate 90 percent confidence region
   box <- cbind( x=quantile( geyser$duration, c(0.05, 0.95 )), 
                 y=quantile( geyser$waiting, c(0.05, 0.95 )) )

   rect(box[1,1], box[1,2], box[2,1], box[2,2], border="red",
        density=5,
        angle=-45,
        col="red" )

   ## now a nice legend
   legend( "topright", legend=c("       Region type",
                                "Univariate Normal Theory",
                                "Univarite Empirical",
                                "Smoothed Bivariate"),
           lwd=c(NA,1,1,1),
           col=c("black","blue","red","green"),
           lty=c(NA,1,1,1)
         )

   ####
   ## Test with a large number of points
   ####
   ## Not run: 
   x <- rnorm(60000, sd=1)
   y <- c( rnorm(40000, mean=x, sd=1),
           rnorm(20000, mean=x+4, sd=1) )

   hist2d(x,y)
   ci <- ci2d(x,y)
   ci
   
## End(Not run)