R/depth.R

#'@title Depth calculation
#'
#' @details Calculate depth functions.
#'
#'@export
#'
#'
#'  @param u Numerical vector or matrix whose depth is to be calculated. Dimension has to be the same as that of the observations.
#'  @param X The data as a matrix, data frame or list. If it is a matrix or data frame, then each row is viewed as one multivariate observation. If it is a list, all components must be numerical vectors of equal length (coordinates of observations).
#'  @param method Character string which determines the depth function. \code{method} can be "Projection" (the default), "Mahalanobis", "Euclidean" or "Tukey". For details see \code{\link{depth}.}
#'  @param name name for this data set - it will be used on plots.
#'  @param threads number of threads used in parallel computations. Default value -1 means that all possible cores will be used. 
#'  @param ... parameters specific to method - see \code{\link{depthEuclid}}
#'
#' @details
#'
#' {the Mahalanobis depth}  \deqn{ {D}_{MAH}(y,{X}^{n})=\frac{1}{1+{{(y-\bar{x})}^{T}}{{S}^{-1}}(y-\bar{x})}, }    where  \eqn{ S }  denotes the sample covariance matrix  \eqn{ {X}^{n} } .  
#' 
#' #' A symmetric projection depth  \eqn{ D\left( x,X \right) }  of a point  \eqn{ x\in {{{R}}^{d}} } ,  \eqn{ d\ge 1 }  is defined as  \eqn{ D\left( x,X \right)_{PRO}={{\left[ 1+su{{p}_{\left\| u \right\|=1}}\frac{\left| {{u}^{T}}x-Med\left( {{u}^{T}}X \right) \right|}{MAD\left( {{u}^{T}}X \right)} \right]}^{-1}}, }    where Med denotes the univariate median,  \eqn{ MAD\left( Z \right) }  =  \eqn{ Med\left( \left| Z-Med\left( Z \right)                                                                              \right| \right) } . Its sample version denoted by  \eqn{ D\left( x,{X}^{n} \right) }  or  \eqn{ D\left( x,{X}^{n} \right) }  is obtained by replacing  \eqn{ F }   by its empirical counterpart  \eqn{ {{F}_{n}} }  calculated from the sample  \eqn{ {X}^{n} } . 
#' 
#' Next interesting depth is the weighted  \eqn{ {L}^{p} }  depth. The weighted  \eqn{ {L}^{p} }  depth  \eqn{ D({x},F) }  of a point  \eqn{ {x}\in {R}^{d} } , \eqn{ d\ge 1 }  generated by  \eqn{ d }   dimensional random vector  \eqn{ {X} }  with distribution  \eqn{ F } ,  is defined as  \eqn{ D({x},F)=\frac{1}{1+Ew({{\left\| x-X \right\|}_{p}})}, }    where  \eqn{ w }  is a suitable weight function on  \eqn{ [0,\infty ) }  , and  \eqn{ {{\left\| \cdot  \right\|}_{p}} }  stands for the  \eqn{ {L}^{p} }  norm (when p=2 we have usual Euclidean norm). We assume that  \eqn{ w }  is non-decreasing and continuous on  \eqn{ [0,\infty ) }  with  \eqn{ w(\infty -)=\infty  } , and for  \eqn{ a,b\in {{{R}}^{d}} }  satisfying  \eqn{ w(\left\| a+b \right\|)\le w(\left\| a \right\|)+w(\left\| b \right\|) } . Examples of the weight functions are:    \eqn{ w(x)=a+bx }  ,  \eqn{ a,b>0 }  or  \eqn{ w(x)={x}^{\alpha } } . The empirical version of the weighted  \eqn{ {L}^{p} }  depth is obtained by replacing distribution  \eqn{ F }    of  \eqn{ {X} }  in  \eqn{ Ew({{\left\| {x}-{X} \right\|}_{p}})=\int{w({{\left\| x-t \right\|}_{p}})}dF(t) }  by its empirical counterpart calculated from the sample  \eqn{ {{{X}}^{n}} }  ....  
#'  
#' 
#' The Projection and Tukey's depths are calculated using an approximate algorithm. Calculations of Mahalanobis, Euclidean and  \eqn{ L^p }  depths are exact. Returns the depth of multivariate point  u  with respect to data set  X.
#'
#' 
#'  @references 
#'  
#' Liu, R.Y., Parelius, J.M. and Singh, K. (1999), Multivariate analysis by data depth: Descriptive statistics, graphics and inference (with discussion),  Ann. Statist.,  27, 783-858.
#' 
#' Mosler K (2013). Depth statistics. In C Becker, R Fried, K S (eds.), Robustness and Complex Data Structures, Festschrift in Honour of Ursula Gather, pp. 17-34. Springer.
#'  
#' Rousseeuw, P.J. and Struyf, A. (1998), Computing location depth and regression depth in higher dimensions,  Stat. Comput.,  8, 193-203.
#' 
#' Zuo, Y. and Serfling, R. (2000), General Notions of Statistical Depth Functions,  Ann. Statist.,  28, no. 2, 461-482.
#'  
#'  @author Daniel Kosiorowski, Mateusz Bocian, Anna Wegrzynkiewicz and Zygmunt Zawadzki from Cracow University of Economics.
#'  
#'  @seealso \code{\link{depthContour}} and \code{\link{depthPersp}} for depth graphics.
#'  
#'  @examples
#'  require(robustbase)
#'  
#'  ## Calculation of Projection depth
#'  data(starsCYG, package = "robustbase")
#'  depth(t(colMeans(starsCYG)), starsCYG)
#'  
#'  #Aslo for matrices
#'  depth(starsCYG, starsCYG)
#'  
#'  ## Projection depth applied to a large bivariate data set
#'  x = matrix(rnorm(9999), nc = 3)
#'  depth(x, x)
#'    
#'  @keywords
#'  multivariate
#'  nonparametric
#'  robust
#'  depth function
#'  
#'
depth = function(u, X, method="Projection", name = "X", threads = -1,...)
{  
  if(is.data.frame(u)) u = as.matrix(u)
  if(is.vector(u)) u = matrix(u,ncol = dim(X)[2])
  if(missing(X) && method == "MBD") return(depthMBD(u,...))
  
  
  if(missing(X)) X = u
  if(is.data.frame(X)) X = as.matrix(X)
  if(is.vector(X)) X = matrix(X,ncol = 1)

  ###################################
  if (method=="Mahalanobis")
  {	
    return(depthMah(u, X, name = name, threads = threads, ...))      
  }
  ####################################
  if (method=="Euclidean")
  {
  		return(depthEuclid(u, X, name = name))
  }
  ####################################
  if(method == "Projection")
  {
    return(depthProjection(u, X, name = name, threads = threads, ...))
  }
  #######################################################################
  if (method=="Tukey")
  {
    return(depthTukey(u, X, name = name, ...))
  }
  ########################################################
  if (method=="LP")
  {
    return(depthLP(u, X, name = name, threads = threads,...))
  }
  if(method=="Local")
  {
    return(depthLocal(u, X, name = name, ...))
  }
  if(method == "MBD") return(depthMBD(u, X,...))
  
}

############################################
############################################
############################################

#'@title Euclidean Depth
#'@export
#'
#'@description Computes the euclidean depth of a point or vectors of points with respect to a multivariate data set.
#'
#'  @param u Numerical vector or matrix whose depth is to be calculated. Dimension has to be the same as that of the observations.
#'  @param X The data as a matrix, data frame or list. If it is a matrix or data frame, then each row is viewed as one multivariate observation. If it is a list, all components must be numerical vectors of equal length (coordinates of observations).
#'  @param name name for this data set - it will be used on plots from depthproc.
#'  @param \dots currently not supported.
#'
#'
#'@details 
#'
#'  Calculation of Euclidean depth is exact.
#'  
#'  Returns the depth of multivariate point \code{u} with respect to data set \code{X}.
#'  
#'  @author Daniel Kosiorowski, Mateusz Bocian, Anna Wegrzynkiewicz and Zygmunt Zawadzki from Cracow University of Economics.
#'  
#'  
#'  @examples
#'  x <- matrix(rnorm(9999), nc = 3)
#'  depthEuclid(x, x)
#'  
#'  
#'  
#'  @keywords
#'  multivariate
#'  nonparametric
#'  depth function
depthEuclid = function(u, X, name = "X",...)
{
  if(missing(X)) X = u
  n = dim(u)[1]
  center = colMeans(X)
  center = matrix(rep(center,n),nrow=n,byrow=TRUE)
  depth=1/(1+(rowSums((u-center)^2)))  
  
  new("DepthEuclid", depth, u = u, X = X, method = "Euclidean", name = name)
}


############################################
############################################
############################################

#'@title Mahalanobis Depth
#'@export
#'@description Computes the mahalanobis depth of a point or vectors of points with respect to a multivariate data set.
#'
#'  @param u Numerical vector or matrix whose depth is to be calculated. Dimension has to be the same as that of the observations.
#'  @param X The data as a matrix, data frame or list. If it is a matrix or data frame, then each row is viewed as one multivariate observation. If it is a list, all components must be numerical vectors of equal length (coordinates of observations).
#'  @param name name for this data set - it will be used on plots.
#'  @param threads number of threads used in parallel computations. Default value -1 means that all possible cores will be used.
#'  @param cov custom covariance matrix passed. If NULL standard calculations will be based on standard covariance estimator.
#'  @param mean custom mean vector. If null mean average will be used.
#'  @param \dots currently not supported.
#'
#'@details 
#'
#'  Calculation of Mahalanobis depth is exact.
#'  
#'  Returns the depth of multivariate point \code{u} with respect to data set \code{X}.
#'  
#'  @author Daniel Kosiorowski, Mateusz Bocian, Anna Wegrzynkiewicz and Zygmunt Zawadzki from Cracow University of Economics.
#'  
#'  
#'  @examples
#'  x <- matrix(rnorm(9999), nc = 3)
#'  depthMah(x, x)
#'  
#'  
#'  
#'  @keywords
#'  multivariate
#'  nonparametric
#'  depth function
depthMah = function(u, X, name = "X", cov = NULL, mean = NULL, threads = -1, ...)
{
  if(missing(X)) X = u
  if(!is.null(mean)) mean = matrix(mean, ncol = length(mean))
  
  depth = depthMahCPP(u,X, cov, mean, threads)
  new("DepthMahalanobis", depth, u = u, X = X, method = "Mahalanobis", name = name)
}


############################################
############################################
############################################

#'@title Projection Depth
#'@export
#'@description Computes the Projection depth of a point or vectors of points with respect to a multivariate data set.
#'
#'  @param u Numerical vector or matrix whose depth is to be calculated. Dimension has to be the same as that of the observations.
#'  @param X The data as a matrix, data frame or list. If it is a matrix or data frame, then each row is viewed as one multivariate observation. If it is a list, all components must be numerical vectors of equal length (coordinates of observations).
#'  @param ndir number of directions used in computations
#'  @param threads number of threads used in parallel computations. Default value -1 means that all possible cores will be used.
#'  @param name name for this data set - it will be used on plots from depthproc.
#'  @param \dots currently not supported.
#'
#'@details 
#'
#'  Irrespective of dimension, Projection and Tukey's depth is obtained by approximate calculation.
#'  
#'  Returns the depth of multivariate point \code{u} with respect to data set \code{X}.
#'  
#'  @author Daniel Kosiorowski, Mateusz Bocian, Anna Wegrzynkiewicz and Zygmunt Zawadzki from Cracow University of Economics.
#'  
#'  
#'  @examples
#'  x = matrix(rnorm(3000), nc = 3)
#'  a = depthProjection(x, x, ndir = 2000)
#'  
#'  
#'  @keywords
#'  multivariate
#'  nonparametric
#'  depth function
depthProjection = function(u, X, ndir = 1000, name = "X", threads = -1,...)
{
  if(missing(X)) X = u
  depth = depthProjCPP(u, X, ndir, threads)
  new("DepthProjection", depth, u = u, X = X, method = "Projection", name = name)
}

############################################
############################################
############################################

#'@title Tukey Depth
#'@export
#'@description Computes the Tukey depth of a point or vectors of points with respect to a multivariate data set.
#'
#'  @param u Numerical vector or matrix whose depth is to be calculated. Dimension has to be the same as that of the observations.
#'  @param X The data as a matrix, data frame or list. If it is a matrix or data frame, then each row is viewed as one multivariate observation. If it is a list, all components must be numerical vectors of equal length (coordinates of observations).
#'  @param ndir number of directions used in computations
#'  @param threads number of threads used in parallel computations. Default value -1 means that all possible cores will be used.
#'  @param name name for this data set - it will be used on plots from depthproc.
#'  @param exact if TRUE exact alhorithm will be used . Currently it works only for 2 dimensional data set.
#'  @param \dots currently not supported.
#'
#'@details 
#'
#'  Irrespective of dimension, Projection and Tukey's depth is obtained by approximate calculation.
#'  
#'  Returns the depth of multivariate point \code{u} with respect to data set \code{X}.
#'  
#'  @author Daniel Kosiorowski, Mateusz Bocian, Anna Wegrzynkiewicz and Zygmunt Zawadzki from Cracow University of Economics.
#'  
#'  
#'  @examples
#'  \dontrun{
#'  x = matrix(rnorm(3000), nc = 3)
#'  depthTukey(x, ndir = 2000)
#'  }
#'  
#'  # Exact algorithm in 2d
#'  x = matrix(rnorm(2000), nc = 2)
#'  depthTukey(x, exact = TRUE)
#'  
#'  @keywords
#'  multivariate
#'  nonparametric
#'  depth function
depthTukey = function(u, X, ndir = 1000, name = "X", threads = -1, exact = FALSE,...)
{
  if(missing(X)) X = u
  tukey1d = function(u,X)
  {
    Xecdf = ecdf(X)
    uecdf = Xecdf(u) 
    uecdf2 = 1-uecdf
    min.ecdf = uecdf>uecdf2
    depth = uecdf 
    depth[min.ecdf]=uecdf2[min.ecdf] 
    depth
  }
  
  if (ncol(X)==1)
  {
    depth= tukey1d(u,X)
  } else if(ncol(X) == 2 && exact)
  {
    depth = depthTukeyCPP(u,X, exact,threads)
  } else  # czyli jesli wymiar d>2
  {
    proj = t(runifsphere(ndir, ncol(X)))
    xut = X%*%proj
    uut = u%*%proj
    
    OD = matrix(nrow=nrow(uut),ncol=ncol(uut))
    
    for (i in 1:ndir)
    {
      
      OD[,i]=tukey1d(uut[,i],xut[,i])  
    }
    
    depth = apply(OD,1,min)
  }
  new("DepthTukey", depth, u = u, X = X, method = "Tukey", name = name)
}

#'@title LP Depth
#'@export
#'@description Computes the LP depth of a point or vectors of points with respect to a multivariate data set.
#'
#'  @param u Numerical vector or matrix whose depth is to be calculated. Dimension has to be the same as that of the observations.
#'  @param X The data as a matrix, data frame or list. If it is a matrix or data frame, then each row is viewed as one multivariate observation. If it is a list, all components must be numerical vectors of equal length (coordinates of observations).
#'  @param pdim dimension used in calculating depth function.
#'  @param la slope the weighing function.
#'  @param lb intercept in the weighing function.
#'  @param name name for this data set - it will be used on plots from depthproc.
#'  @param threads number of threads used in parallel computations. Default value -1 means that all possible cores will be used.
#'  @param func the weighing function. Currently it is not supported.
#'  @param \dots currently not supported.
#'
#'@details 
#'  
#'  Returns the depth of multivariate point \code{u} with respect to data set \code{X}.
#'  
#'  @author Daniel Kosiorowski, Mateusz Bocian, Anna Wegrzynkiewicz and Zygmunt Zawadzki from Cracow University of Economics.
#'  
#'  
#'  @examples
#'  x = matrix(rnorm(3000), ncol = 3)
#'  
#'  #Same results
#'  depthLP(x, x, ndir = 2000, pdim = 2) 
#'  
#'  @keywords
#'  multivariate
#'  nonparametric
#'  depth function
#'  
depthLP = function(u, X, pdim = 2, la = 1, lb = 1, name = "X", threads = -1, func = NULL,...)
{
  if(missing(X)) X = u
  if(is.null(func)) depth = depthLPCPP(u, X, pdim, la, lb, threads = threads)
  #norm = function(xi, z, p = 1) sum(abs(z-xi)^p)^(1/p)
  
  new("DepthLP", depth, u = u, X = X, method = "LP", name = name)
}


#'@title Modified band depth
#'@export
#'@description Computes the modified band depth.
#'
#' @param u Numerical vector or matrix whose depth is to be calculated. Dimension has to be the same as that of the observations.
#' @param X The data as a matrix, data frame or list. If it is a matrix or data frame, then each row is viewed as one multivariate observation. If it is a list, all components must be numerical vectors of equal length (coordinates of observations).
#' @param name for this data set - it will be used on plots from depthproc.
#' @param \dots currently not supported.
#'
#'@examples
#'
#'  x = matrix(rnorm(600), nc = 20)
#'  depthMBD(x)
#'  
depthMBD = function(u, X, name = "X",...)
{
  if(missing(X)) 
    {
      X = u
      depth = modBandDepth(u)
    } else
    {
      depth = modBandDepthRef(u,X)  
    }
    
  new("DepthMBD", as.numeric(depth), u = u, X = X, method = "MBD", name = name)
  

}

Try the DepthProc package in your browser

Any scripts or data that you put into this service are public.

DepthProc documentation built on May 2, 2019, 6:22 p.m.