R/pearson.dist.R

##' Distance based on Pearson's \eqn{R^2}{R squared}
##'
##' The calculated distance is
##' \eqn{D^2 = \frac{1 - COR (\code{x}')}{2}}{D^2 = (1 - COR (x')) / 2}
##'
##' The distance between the rows of \code{x} is calculated.  The possible
##' values range from 0 (prefectly correlated) over 0.5 (uncorrelated) to 1
##' (perfectly anti-correlated).
##'
##' @param x a matrix
##' @return distance matrix (distance object)
##' @author C. Beleites
##' @seealso \code{\link[stats]{as.dist}}
##' @references S. Theodoridis and K. Koutroumbas: Pattern Recognition, 3rd ed., p. 495
##' @keywords cluster
##' @export
##' @examples
##'
##' pearson.dist (flu [[]])
##' pearson.dist (flu)
pearson.dist <- function (x) {

  x <- as.matrix (x)

  ## center & scale *row*s
  ## (n - 1) factor cancels out between variance scaling and calculating correlation
  x <- x - rowMeans (x)
  x <- x / sqrt (rowSums (x^2))

  if (hy.getOption("gc")) gc ()
  x <-  tcrossprod (x)

  ## keep only lower triagonal
  if (hy.getOption("gc")) gc ()
  x <- as.dist (x)

  if (hy.getOption("gc")) gc ()
  0.5 - x / 2
}

##' @include unittest.R
.test (pearson.dist) <- function (){
  context ("pearson.dist")

  test_that("pearson.dist against manual calculation", {
    expect_equivalent (
      pearson.dist (flu),
      as.dist (0.5 - cor (t (as.matrix (flu))) / 2))
  })
}

## benchmark
# function (){
#   m <- sample (chondro, 10000) [[]]
#   microbenchmark (
#     cor = as.dist (0.5 - cor (t (as.matrix (m))) / 2),
#     tcross = pearson.dist (m),
#     times = 10L
#   )
# }

Try the hyperSpec package in your browser

Any scripts or data that you put into this service are public.

hyperSpec documentation built on Sept. 13, 2021, 5:09 p.m.