sampdp <- function(X, k, diss = c("euclidean", "mahalanobis", "correlation")) {
diss <- match.arg(diss)
X <- .matrix(X)
n <- dim(X)[1]
zn <- seq_len(n)
if(k > round(n / 2))
stop("\n\n Argument 'k' must be <= half of the data. \n\n")
if(diss == "euclidean")
D <- .dist(X)
else
D <- matdis(X, diss = diss)
colnames(D) <- rownames(D) <- zn
## initial 2 selections
## train
s1 <- which(D == max(D), arr.ind = TRUE)[1, ]
zD <- D
zD[s1, ] <- zD[, s1] <- NA
## test
s2 <- which(D == max(zD, na.rm = TRUE), arr.ind = TRUE)[1, ]
## candidates
cand <- seq_len(n)[-c(s1, s2)]
for(i in seq_len(k - 2)) {
zD <- D[s1, cand, drop = TRUE]
u <- matrixStats::colMins(zD)
zs <- cand[which(u == max(u))[1]]
s1 <- c(s1, zs)
cand <- zn[-c(s1, s2)]
zD <- D[s2, cand, drop = TRUE]
u <- matrixStats::colMins(zD)
zs <- cand[which(u == max(u))[1]]
s2 <- c(s2, zs)
cand <- zn[-c(s1, s2)]
}
names(s2) <- names(s1) <- NULL
list(train = s1, test = s2, remain = cand)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.