R/pearson.dist.R

##' Distance based on Pearson's \eqn{R^2}{R squared}
##' 
##' The calculated distance is
##' \eqn{D^2 = \frac{1 - COR (\code{x}')}{2}}{D^2 = (1 - COR (x')) / 2}
##' 
##' The distance between the rows of \code{x} is calculated.  The possible
##' values range from 0 (prefectly correlated) over 0.5 (uncorrelated) to 1
##' (perfectly anti-correlated).
##' 
##' @param x a matrix
##' @return distance matrix (distance object)
##' @author C. Beleites
##' @seealso \code{\link[stats]{as.dist}}
##' @references S. Theodoridis and K. Koutroumbas: Pattern Recognition, 3rd ed., p. 495
##' @keywords cluster
##' @export
##' @examples
##' 
##' pearson.dist (flu [[]])
##' pearson.dist (flu)
pearson.dist <- function (x) {
	
  x <- as.matrix (x)
  
  ## center & scale *row*s
  ## (n - 1) factor cancels out between variance scaling and calculating correlation
  x <- x - rowMeans (x)
  x <- x / sqrt (rowSums (x^2)) 
  
  if (hy.getOption("gc")) gc ()
  x <-  tcrossprod (x) 
  
  ## keep only lower triagonal
  if (hy.getOption("gc")) gc ()
  x <- as.dist (x)

  if (hy.getOption("gc")) gc ()
  0.5 - x / 2
}

.test (pearson.dist) <- function (){
	checkEqualsNumeric (
		pearson.dist (flu),
		as.dist (0.5 - cor (t (as.matrix (flu))) / 2))
}

## benchmark
function (){
  m <- sample (chondro, 10000) [[]]
  microbenchmark (
    cor = as.dist (0.5 - cor (t (as.matrix (m))) / 2),
    tcross = pearson.dist (m),
    times = 10L
  )
}

Try the hyperSpec package in your browser

Any scripts or data that you put into this service are public.

hyperSpec documentation built on May 2, 2019, 5:45 p.m.