R/mquantile.R

Defines functions `cond_cdf` `joint_cdf`

#' @title Conditional and joint multivariate quantiles
#'
#' @description
#' Quantiles of observations in multivariate space.
#'
#' @param x vector, matrix, or data.frame of observations.
#'
#' @param pltype character, plot type, one of
#'      \code{c('none','cdf','pairs','rgl','persp')}.
#'
#' @param ngrid vector, number of grid points.
#'
#' @param ... further arguments passed to other functions.
#'
#' @return
#' Plots to device, or else object of class \code{'zzz'}.
#'
#' @details
#' Where:\cr
#'      X \verb{    } = n x m matrix,\cr
#'      EPDF \verb{} = empirical probability distribution function
#'      (density), and\cr
#'      ECDF \verb{} = empirical cumulative distribution function;
#'
#'
#'      Then three possible multivariate quantiles are:
#'
#'      [1] Marginal quantile: from ECDF of raw data for EACH axis
#'      independently (so yields m separate vectors each of length
#'      n).
#'
#'      [2] Joint quantile: from ECDF of raw data across ALL m axes
#'      simultaneously (so yields 1 vector of length n); always
#'      monotonically increasing toward higher axis values.
#'
#'      [3] Conditional quantile: from the ECDF of the EPDF of ALL m
#'      axes simultaneously (so yields 1 vector of length n);
#'      monotonically increasing toward lower \emph{density} values,
#'      but may vary with respect to \emph{axis} values.
#'
#' Conditional quantiles are analogous to data depth methods; current
#'      implementation allows 1-6 dimensions. Joint quantiles are
#'      analogous to Pareto frontiers; current implementation allows
#'      1-3 dimensions.
#'
#' @examples
#' # iris data
#' x <- iris[,1:3]
#' cond_cdf(x, 'pairs')
#' joint_cdf(x, 'pairs')
#' cond_cdf(x, 'rgl')
#' joint_cdf(x, 'rgl')
#'
#' # dustbunny data
#' set.seed(23)
#' x <- data.frame(q = rnorm(99,0,5)^2,
#'                 r = rnorm(99,0,5)^2,
#'                 s = rnorm(99,10,10)^2)
#' cond_cdf(x, 'pairs')
#' joint_cdf(x, 'pairs')
#' cond_cdf(x, 'rgl')
#' joint_cdf(x, 'rgl')
#'
#' @export
#' @rdname mquantile
###   conditional CDF: percentiles of an EPDF across 1-6 dimensions
`cond_cdf` <- function(x, pltype, ngrid, ...){
     if (!is.matrix(x)) x <- as.matrix(x)
     na <- anyNA(x)
     if (na){
          nr <- nrow(x)
          x  <- na.omit(x)
          ix <- attr(x, 'na.action')
     }
     nc <- dim(x)[[2]]
     pp <- c('none','cdf','pairs','rgl','persp')
     if (missing(pltype)) {
          pltype <- 'none'
     } else {
          pltype <- pp[pmatch(pltype, pp)]
     }
     if (pltype=='persp' & nc==2){
          if (missing(ngrid))  ngrid <- 44
          f <- ks::kde(x=x, gridsize=ngrid)
          persp(f$estimate, xlab=f$names[1], ylab=f$names[2],
                zlab='Prob. density',
                col=surfcol(f$estimate, ngrid=ngrid, ...), ...)
     }
     if (pltype=='persp' & nc>2){
          message('more than 2 columns, using `pairs` not `persp`')
          pltype <- 'pairs'
     }
     f <- ks::kde(x=x, eval.points=x)  # EPDF of orig pts
     z <- f$estimate               # density values of raw data
     if (na){                       # put values in appropriate rows
          v  <- vector('numeric', nr)
          v[ 1:nr %in% ix] <- NA
          v[!1:nr %in% ix] <- z
          z <- v
          m <- matrix(NA, nrow=nr, ncol=nc)
          m[!1:nr %in% ix, ] <- as.matrix(f$eval.points)
          f$eval.points <- as.data.frame(m)
     }
     e <- ecdf(z)   # ECDF  of the *DENSITY* values! (not raw data)
     p <- e(z)      # %iles of the *DENSITY* values! (not raw data)
     p <- (1-p)     # take complement (higher values more 'extreme')
     o <- as.data.frame(cbind(f$eval.points, den=z, p=p))
     if (pltype=='none'){
          return(o)
     }
     ccc <- colvec(p, ...)
     if (pltype=='cdf'){
          plot(z, p, pch=16, col=ccc, bty='l',
               las=1, xlab='Density values',
               ylab='Joint percentile of\ndensity values ')
     }
     if (pltype=='rgl' & nc==3){
          rgl::plot3d(o[,1], o[,2], o[,3], pch=16, size=10, col=ccc,
                      xlab=f$names[1], ylab=f$names[2],
                      zlab=f$names[3], box=F)
     } else {
          if (pltype=='rgl' & nc!=3){
               cat('`rgl` needs 3 columns, plotting `pairs` instead')
               pltype <- 'pairs'
          }
     }
     if (pltype=='pairs' & nc>2){
          pairs(f$eval.points, upper.panel=NULL, pch=16, col=ccc, ...)
     }else{
          if (pltype=='pairs' & nc<=2){
               plot(f$eval.points, pch=16, col=ccc, ...)
          }
     }
}
#' @export
#' @rdname mquantile
###   joint CDF: over ALL dimensions simultaneously for 1-3 dimensions
`joint_cdf` <- function(x, pltype, ngrid, ...){
     if (!is.matrix(x)) x <- as.matrix(x)
     nc <- dim(x)[[2]]
     if (nc > 3) stop('only works for 1-3 dimensions')
     na <- anyNA(x)
     if (na){
          nr <- nrow(x)
          x  <- na.omit(x)
          ix <- attr(x, 'na.action')
     }
     pp <- c('none','cdf','pairs','rgl','persp')
     if (missing(pltype)) {
          pltype <- 'none'
     } else {
          pltype <- pp[pmatch(pltype, pp)]
     }
     if (pltype=='persp' & nc==2){
          if (missing(ngrid))  ngrid <- 44
          f <- ks::kcde(x=x, gridsize=ngrid)
          persp(f$estimate,
                col=surfcol(f$estimate, ngrid=ngrid, ...), ...)
     }
     if (pltype=='persp' & nc>2){
          message('more than 2 columns, using `pairs` not `persp`')
          pltype <- 'pairs'
     }
     f <- ks::kcde(x=x, eval.points=x)
     p <- f$estimate  # %iles of raw data, higher values more extreme
     if (na){                       # put values in appropriate rows
          v  <- vector('numeric', nr)
          v[ 1:nr %in% ix] <- NA
          v[!1:nr %in% ix] <- p
          p <- v
          m <- matrix(NA, nrow=nr, ncol=nc)
          m[!1:nr %in% ix, ] <- as.matrix(f$eval.points)
          f$eval.points <- as.data.frame(m)
     }
     o <- as.data.frame(cbind(f$eval.points, p=p))
     if (pltype=='none'){
          return(o)
     }
     ccc <- colvec(p, ...)
     if (pltype=='cdf'){
          auto_rowcol <- function(n = nc) {
               if (n <= 3)
                    c(1, n)
               else if (n <= 6)
                    c(2, (n + 1)%/%2)
               else if (n <= 12)
                    c(3, (n + 2)%/%3)
               else c(ceiling(n/(nr <- ceiling(sqrt(n)))), nr)
          }
          op <- par(mfrow=auto_rowcol())
          for(i in 1:nc){
               plot(x[,i], p, pch=16, col=ccc, bty='l', las=1,
                    xlab=f$names[i], ylab='Marginal percentile')
          }
          par(op)
     }
     if (pltype=='rgl' & nc==3){
          rgl::plot3d(o[,1],o[,2],o[,3], pch=16, size=10, col=ccc,
                      xlab=f$names[1], ylab=f$names[2],
                      zlab=f$names[3], box=F)
     } else {
          if (pltype=='rgl' & nc!=3){
               cat('`rgl` needs 3 columns, plotting `pairs` instead')
               pltype <- 'pairs'
          }
     }
     if (pltype=='pairs' & nc>2){
          pairs(f$eval.points, upper.panel=NULL, pch=16, col=ccc, ...)
     }else{
          if (pltype=='pairs' & nc<=2){
               plot(f$eval.points, pch=16, col=ccc, ...)
          }
     }
}
phytomosaic/vuln documentation built on Sept. 21, 2019, 8:23 a.m.