# This code generates expected results from the initial reference implementation
# (i.e. https://github.com/thomasp85/densityClust/commit/b038fb30ea6f59d60a3a4b45eaa3ac9a504951f6)
# It is called by the testing code to compare the old results with the new.
set.seed(123)
dists <- list(
dist(matrix(rnorm(1000), ncol = 4)),
dist(matrix(rnorm(1000), ncol = 20)),
dist(matrix(rnorm(10000), ncol = 40)),
dist(matrix(sample(1:100000, 1000), ncol = 4)),
dist(matrix(sample(1:100000, 1000), ncol = 20)),
dist(matrix(sample(1:100000, 1000), ncol = 50))
)
referenceImplementation <- function(distance, dc, gaussian=FALSE) {
if(missing(dc)) {
dc <- reference_estimateDc(distance)
}
rho <- reference_localDensity(distance, dc, gaussian=gaussian)
delta <- reference_distanceToPeak(distance, rho)
res <- list(rho=rho, delta=delta, distance=distance, dc=dc, threshold=c(rho=NA, delta=NA), peaks=NA, clusters=NA, halo=NA,
knn_graph = NA, nearest_higher_density_neighbor = NA,
nn.index = NA, nn.dist = NA)
class(res) <- 'densityCluster'
res
}
reference_estimateDc <- function(distance, neighborRateLow=0.01, neighborRateHigh=0.02) {
comb <- as.matrix(distance)
size <- attr(distance, 'Size')
dc <- min(distance)
dcMod <- as.numeric(summary(distance)['Median']*0.01)
while(TRUE) {
neighborRate <- mean((apply(comb < dc, 1, sum)-1)/size)
if(neighborRate > neighborRateLow && neighborRate < neighborRateHigh) break
if(neighborRate > neighborRateHigh) {
dc <- dc - dcMod
dcMod <- dcMod/2
}
dc <- dc + dcMod
}
cat('Distance cutoff calculated to', dc, '\n')
dc
}
reference_distanceToPeak <- function(distance, rho) {
comb <- as.matrix(distance)
res <- sapply(1:length(rho), function(i) {
peaks <- comb[rho>rho[i], i]
if(length(peaks) == 0) {
max(comb[,i])
} else {
min(peaks)
}
})
names(res) <- names(rho)
res
}
reference_localDensity <- function(distance, dc, gaussian=FALSE) {
comb <- as.matrix(distance)
if(gaussian) {
res <- apply(exp(-(comb/dc)^2), 1, sum)-1
} else {
res <- apply(comb < dc, 1, sum)-1
}
if(is.null(attr(distance, 'Labels'))) {
names(res) <- NULL
} else {
names(res) <- attr(distance, 'Labels')
}
res
}
# Because the new implementation of estimateDc does not maintain equality with
# the previous implementation, calculate the cutoffs using the new
# implementation. Then pass the calculated cutoffs into the reference
# implementation and the new implementation to test that the rest of the
# implementations are the same.
dcs <- lapply(dists, estimateDc)
# Reference DCs for comparison
referenceDcs <- lapply(dists, reference_estimateDc)
# non-Gaussian
densityClustReference <- Map(referenceImplementation, dists, dcs)
# convenient for debugging, but calling non-exported functions not allowed in CRAN
# localDensityReference <- Map(reference_localDensity, dists, dcs)
#
# distanceToPeakReference <- Map(reference_distanceToPeak, dists, localDensityReference)
# Gaussian
gaussianDensityClustReference <- Map(referenceImplementation, dists, dcs, TRUE)
# convenient for debugging, but calling non-exported functions not allowed in CRAN
# gaussianLocalDensityReference <- Map(f = function(x, y) reference_localDensity(x, y, gaussian = TRUE), dists, estimateDcReference)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.