tests/testthat/generateReference.R

# This code generates expected results from the initial reference implementation 
# (i.e. https://github.com/thomasp85/densityClust/commit/b038fb30ea6f59d60a3a4b45eaa3ac9a504951f6)
# It is called by the testing code to compare the old results with the new.

set.seed(123)
dists <- list(
   dist(matrix(rnorm(1000), ncol = 4)),
   dist(matrix(rnorm(1000), ncol = 20)),
   dist(matrix(rnorm(10000), ncol = 40)),
   dist(matrix(sample(1:100000, 1000), ncol = 4)),
   dist(matrix(sample(1:100000, 1000), ncol = 20)),
   dist(matrix(sample(1:100000, 1000), ncol = 50))
)

referenceImplementation <- function(distance, dc, gaussian=FALSE) {
   if(missing(dc)) {
      dc <- reference_estimateDc(distance)
   }
   rho <- reference_localDensity(distance, dc, gaussian=gaussian)
   delta <- reference_distanceToPeak(distance, rho)
   res <- list(rho=rho, delta=delta, distance=distance, dc=dc, threshold=c(rho=NA, delta=NA), peaks=NA, clusters=NA, halo=NA, 
               knn_graph = NA, nearest_higher_density_neighbor = NA, 
               nn.index = NA, nn.dist = NA)
   class(res) <- 'densityCluster'
   res
}

reference_estimateDc <- function(distance, neighborRateLow=0.01, neighborRateHigh=0.02) {
   comb <- as.matrix(distance)
   size <- attr(distance, 'Size')
   dc <- min(distance)
   dcMod <- as.numeric(summary(distance)['Median']*0.01)
   while(TRUE) {
      neighborRate <- mean((apply(comb < dc, 1, sum)-1)/size)
      if(neighborRate > neighborRateLow && neighborRate < neighborRateHigh) break
      if(neighborRate > neighborRateHigh) {
         dc <- dc - dcMod
         dcMod <- dcMod/2
      }
      dc <- dc + dcMod
   }
   cat('Distance cutoff calculated to', dc, '\n')
   dc
}

reference_distanceToPeak <- function(distance, rho) {
   comb <- as.matrix(distance)
   res <- sapply(1:length(rho), function(i) {
      peaks <- comb[rho>rho[i], i]
      if(length(peaks) == 0) {
         max(comb[,i])
      } else {
         min(peaks)
      }
   })
   names(res) <- names(rho)
   res
}

reference_localDensity <- function(distance, dc, gaussian=FALSE) {
   comb <- as.matrix(distance)
   if(gaussian) {
      res <- apply(exp(-(comb/dc)^2), 1, sum)-1
   } else {
      res <- apply(comb < dc, 1, sum)-1
   }
   if(is.null(attr(distance, 'Labels'))) {
      names(res) <- NULL
   } else {
      names(res) <- attr(distance, 'Labels')
   }
   res
}

# Because the new implementation of estimateDc does not maintain equality with
# the previous implementation, calculate the cutoffs using the new 
# implementation. Then pass the calculated cutoffs into the reference 
# implementation and the new implementation to test that the rest of the 
# implementations are the same. 
dcs <- lapply(dists, estimateDc)

# Reference DCs for comparison 
referenceDcs <- lapply(dists, reference_estimateDc)

# non-Gaussian
densityClustReference <- Map(referenceImplementation, dists, dcs)

# convenient for debugging, but calling non-exported functions not allowed in CRAN
# localDensityReference <- Map(reference_localDensity, dists, dcs)
# 
# distanceToPeakReference <- Map(reference_distanceToPeak, dists, localDensityReference)

# Gaussian
gaussianDensityClustReference <- Map(referenceImplementation, dists, dcs, TRUE) 

# convenient for debugging, but calling non-exported functions not allowed in CRAN
# gaussianLocalDensityReference <- Map(f = function(x, y) reference_localDensity(x, y, gaussian = TRUE), dists, estimateDcReference)
thomasp85/densityClust documentation built on Feb. 7, 2024, 7:20 a.m.