inst/doc/hopkins_optimization.R

## ---- eval=FALSE--------------------------------------------------------------
#  for (i in 1:n) {
#    distp[1] <- dist(rbind(p[i, ], data[1, ]))
#    minqi <- dist(rbind(q[i, ], data[1, ]))
#    for (j in 2:nrow(data)) {
#      distp[j] <- dist(rbind(p[i, ], data[j, ]))
#      error <- q[i, ] - data[j, ]
#      if (sum(abs(error)) != 0) {
#        distq <- dist(rbind(q[i, ], data[j, ]))
#        if (distq < minqi)
#          minqi <- distq
#      }
#    }

## ---- eval=FALSE--------------------------------------------------------------
#  DistVecToMat <- function(x,Y){
#    min(apply(Y, 1, function(yi) sqrt(sum((x-yi)^2))))
#  }
#  # DistVecToMat(c(1,2), matrix(c(4,5,6,7), nrow=2, byrow=TRUE))
#  # 4.242641 7.071068 # sqrt(c(18,50))
#  
#  # For each ith row of sample, calculate the distance of:
#  #   U[i,] to X
#  #   W[i,] to X[-k[i] , ], i.e. omitting W[i,] from X
#  dux <- rep(NA, n)
#  dwx <- rep(NA, n)
#  for(i in 1:n) {
#    dux[i] <- DistVecToMat(U[i,], X)
#    dwx[i] <- DistVecToMat(W[i,], X[-k[i],])
#  }

## ---- eval=FALSE--------------------------------------------------------------
#  # pdist(W,X) computes distance between rows of W and rows of X
#  tmp <- as.matrix(pdist(W,X))
#  dwx <- apply(tmp, 1, min)
#  
#  # pdist(U,X) computes distance between rows of U and rows of X
#  tmp <- as.matrix(pdist(U,X))
#  dux <- apply(tmp, 1, min)

## ---- eval=FALSE--------------------------------------------------------------
#  library(microbenchmark)
#  
#  # Method 1. Loop over vector 1:n
#  # for(i in 1:m) dwx[i,k[i]] <- Inf
#  microbenchmark(hopkins(X12, 500))
#  ## Unit: milliseconds
#  ##                expr     min      lq     mean  median       uq      max neval
#  ##  hopkins(X12, 500) 38.9493 42.8045 50.73876 45.0668 47.69945 120.9366   100
#  
#  # Method 2. Build a matrix of indexes to the cells that need changing
#  # dwx[ matrix( c(1:n, k), nrow=n) ] <- Inf
#  microbenchmark(hopkins(X12, 500))
#  ## Unit: milliseconds
#  ##                expr     min       lq     mean   median      uq      max neval
#  ##  hopkins(X12, 500) 39.2668 42.41565 50.21628 43.74215 46.9462 126.3522   100

## ---- eval=FALSE, echo=FALSE--------------------------------------------------
#  set.seed(1)
#  X1 <- matrix(rnorm(1000, mean=0, sd=1), ncol=2)
#  X2 <- matrix(rnorm(1000, mean=5, sd=1), ncol=2)
#  X12 <- rbind(X1, X2)
#  plot(X12)
#  
#  # Old version
#  set.seed(42)
#  system.time( hop1 <- clustertend::hopkins(X12, 500) ) # 17 sec
#  1-hop1$H
#  # .8994528
#  
#  # New version
#  set.seed(42)
#  system.time( hop2 <- hopkins::hopkins(X12, 500, d=1) ) # .05 sec
#  hop2
#  # .9054555

Try the hopkins package in your browser

Any scripts or data that you put into this service are public.

hopkins documentation built on Aug. 20, 2023, 9:08 a.m.