Nothing
assign_by_seed <- function(seeds, nng) {
cl_label <- rep(as.integer(NA), ncol(nng))
current_label <- 0
invisible(lapply(seeds, function(i) {
cl_label[nng[, i]] <<- current_label
current_label <<- current_label + 1
}))
cl_label
}
match_n_assign <- function(cl_label,
match,
unassigned,
radius,
distances) {
if (!is.null(radius)) {
unassigned[unassigned] <- (apply(distances[unassigned, match, drop = FALSE], 1, sort)[1, ] <= radius)
}
if (sum(unassigned) > 0) {
cl_label[unassigned] <- cl_label[match[apply(distances[unassigned, match, drop = FALSE], 1, order)[1, ]]]
}
cl_label
}
any_neighbor_or_closest_assigned <- function(unassigned_method,
unassigned,
nng,
assigned,
radius,
cl_label,
distances) {
if (unassigned_method == "any_neighbor") {
unassigned <- apply(nng, 2, any) & unassigned
}
if (any(unassigned)) {
cl_label <- match_n_assign(cl_label, which(assigned), unassigned, radius, distances)
}
cl_label
}
assign_unassigned <- function(distances,
cl_label,
seeds,
nng,
unassigned_method,
radius,
primary_data_points,
secondary_unassigned_method,
secondary_radius) {
assigned <- !is.na(cl_label)
unassigned <- !assigned
if (!is.null(primary_data_points)) {
unassigned <- unassigned & primary_data_points
}
if (any(unassigned)) {
if (unassigned_method == "ignore") {
# nothing
} else if (unassigned_method == "any_neighbor" || unassigned_method == "closest_assigned") {
cl_label <- any_neighbor_or_closest_assigned(unassigned_method,
unassigned,
nng,
assigned,
radius,
cl_label,
distances)
} else if (unassigned_method == "closest_seed") {
cl_label <- match_n_assign(cl_label, seeds, unassigned, radius, distances)
} else {
stop("Unknown options.")
}
}
unassigned <- is.na(cl_label)
if (any(unassigned)) {
if (secondary_unassigned_method == "ignore") {
# nothing
} else if (secondary_unassigned_method == "closest_assigned") {
cl_label <- match_n_assign(cl_label, which(assigned), unassigned, secondary_radius, distances)
} else if (secondary_unassigned_method == "closest_seed") {
cl_label <- match_n_assign(cl_label, seeds, unassigned, secondary_radius, distances)
} else {
stop("Unknown options.")
}
}
cl_label
}
est_average_seed_dist <- function(distances,
nng,
seeds) {
if (length(seeds) > 1000) stop("Too many seeds for replica.")
diag(nng) <- FALSE
mean(unlist(lapply(seeds, function(i) {
mean(distances[i, nng[, i]])
})))
}
replica_nng_clustering <- function(distances,
size_constraint,
seed_method = "exclusion_updating",
unassigned_method = "closest_seed",
radius = NULL,
primary_data_points = NULL,
secondary_unassigned_method = "ignore",
secondary_radius = NULL) {
ensure_distances(distances)
num_data_points <- length(distances)
size_constraint <- coerce_size_constraint(size_constraint, num_data_points)
seed_method <- coerce_args(seed_method, all_seed_methods)
unassigned_method <- coerce_args(unassigned_method,
c("ignore",
"any_neighbor",
"closest_assigned",
"closest_seed",
"estimated_radius_closest_seed"))
radius <- coerce_radius(radius)
if (is.null(primary_data_points)) {
secondary_unassigned_method <- "ignore"
} else {
ensure_indicators(primary_data_points, num_data_points, TRUE)
}
secondary_unassigned_method <- coerce_args(secondary_unassigned_method,
c("ignore",
"closest_assigned",
"closest_seed",
"estimated_radius_closest_seed"))
secondary_radius <- coerce_radius(secondary_radius)
distances <- as.matrix(distances)
nng <- get_simple_nng(distances,
size_constraint,
radius,
primary_data_points)
seeds <- findseeds(nng, seed_method)
cl_label <- assign_by_seed(seeds, nng)
if (unassigned_method == "estimated_radius_closest_seed") {
unassigned_method <- "closest_seed"
radius <- est_average_seed_dist(distances, nng, seeds)
}
if (secondary_unassigned_method == "estimated_radius_closest_seed") {
secondary_unassigned_method <- "closest_seed"
secondary_radius <- est_average_seed_dist(distances, nng, seeds)
}
cl_label <- assign_unassigned(distances,
cl_label,
seeds,
nng,
unassigned_method,
radius,
primary_data_points,
secondary_unassigned_method,
secondary_radius)
make_scclust(as.integer(cl_label),
length(unique(cl_label[!is.na(cl_label)])),
attr(distances, "ids", exact = TRUE))
}
replica_nng_clustering_types <- function(distances,
type_labels,
type_size_constraints,
total_size_constraint = NULL,
seed_method = "exclusion_updating",
unassigned_method = "closest_seed",
radius = NULL,
primary_data_points = NULL,
secondary_unassigned_method = "ignore",
secondary_radius = NULL) {
ensure_distances(distances)
num_data_points <- length(distances)
type_labels <- coerce_type_labels(type_labels, num_data_points)
type_size_constraints <- coerce_type_constraints(type_size_constraints)
type_size_constraints <- make_type_size_constraints(type_size_constraints,
type_labels)
total_size_constraint <- coerce_total_size_constraint(total_size_constraint,
type_size_constraints,
num_data_points)
seed_method <- coerce_args(seed_method, all_seed_methods)
unassigned_method <- coerce_args(unassigned_method,
c("ignore",
"any_neighbor",
"closest_assigned",
"closest_seed",
"estimated_radius_closest_seed"))
radius <- coerce_radius(radius)
if (is.null(primary_data_points)) {
secondary_unassigned_method <- "ignore"
} else {
ensure_indicators(primary_data_points, num_data_points, TRUE)
}
secondary_unassigned_method <- coerce_args(secondary_unassigned_method,
c("ignore",
"closest_assigned",
"closest_seed",
"estimated_radius_closest_seed"))
secondary_radius <- coerce_radius(secondary_radius)
distances <- as.matrix(distances)
nng <- get_type_nng(distances,
type_labels,
type_size_constraints,
total_size_constraint,
radius,
primary_data_points)
seeds <- findseeds(nng, seed_method)
cl_label <- assign_by_seed(seeds, nng)
if (unassigned_method == "estimated_radius_closest_seed") {
unassigned_method <- "closest_seed"
radius <- est_average_seed_dist(distances, nng, seeds)
}
if (secondary_unassigned_method == "estimated_radius_closest_seed") {
secondary_unassigned_method <- "closest_seed"
secondary_radius <- est_average_seed_dist(distances, nng, seeds)
}
cl_label <- assign_unassigned(distances,
cl_label,
seeds,
nng,
unassigned_method,
radius,
primary_data_points,
secondary_unassigned_method,
secondary_radius)
make_scclust(as.integer(cl_label),
length(unique(cl_label[!is.na(cl_label)])),
attr(distances, "ids", exact = TRUE))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.