nTARP_documentation.R
In nTARP: Cluster Analysis Using Thresholding After Random Projections (n-TARP)

## -----------------------------------------------------------------------------
library(nTARP)

## -----------------------------------------------------------------------------
data(iris)
head(iris)

## -----------------------------------------------------------------------------
table(iris$Species)

## -----------------------------------------------------------------------------
set.seed(123532) #random seed for reproducibility
cluster_solution <- nTARP(data = iris[,1:4],
                          number_of_projections = 1000,
                          withinss_threshold = 0.36
                          )

## -----------------------------------------------------------------------------
cluster_solution$OptimalSolution
paste("The optimal within sum of squares was", round(cluster_solution$OptimalWithinss,2))

## -----------------------------------------------------------------------------
print("This is the random vector associated with the best cluster.")
cluster_solution$OptimalProjection
print("This is the boundary where the clusters transition.")
cluster_solution$Threshold
print("The *direction* tells us which cluster has the larger mean, so in this case, points with values larger than the threshold should be assigned to cluster 2.")
cluster_solution$Direction

## -----------------------------------------------------------------------------
observation_to_classify <- iris[1,1:4]
observation_after_projection <- sum(observation_to_classify * cluster_solution$OptimalProjection)
print("The resulting 1-D projection is...")
observation_after_projection 
paste("Our direction, ", cluster_solution$Direction,", tells us that cluster 2 has a mean bigger than the threshold, so since the projected value ", round(observation_after_projection,2)," is larger than the threshold ",round(cluster_solution$Threshold,2)," we should assign it to cluster 2.", sep="")

## -----------------------------------------------------------------------------
hist(cluster_solution$AllWithinss)

## -----------------------------------------------------------------------------
one_feasible_solution <- list(cluster_solution$Clusterings[[1]],cluster_solution$Clusterings[[2]])
one_feasible_solution 

## -----------------------------------------------------------------------------
length(cluster_solution$Clusterings)/2

## -----------------------------------------------------------------------------
set.seed(123532) #random seed for reproducibility
cluster_solution_bisecting <- nTARP_bisecting(data = iris[,1:4],
                                    number_of_projections = 1000,
                                    withinss_threshold = 0.36,
                                    minimum_cluster_size_percent = 20 #default value
                                    )

## -----------------------------------------------------------------------------
cluster_solution_bisecting$BestClusters

## -----------------------------------------------------------------------------
labeled_clusters <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting$BestClusters, ids = 1:nrow(iris))
labeled_clusters[sample(1:150,10,replace = FALSE),] #We'll take a sampling of the observations

## -----------------------------------------------------------------------------
table(labeled_clusters$FinalClusterID,iris$Species)

## -----------------------------------------------------------------------------
labeled_clusters <- consolidate_clusters(cluster_path_matrix = labeled_clusters,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
table(labeled_clusters$FinalClusterID,iris$Species)

## -----------------------------------------------------------------------------
#Merge clusters two more times
labeled_clusters <- consolidate_clusters(cluster_path_matrix = labeled_clusters,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
labeled_clusters <- consolidate_clusters(cluster_path_matrix = labeled_clusters,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
table(labeled_clusters$FinalClusterID,iris$Species)

## -----------------------------------------------------------------------------
set.seed(123532) #random seed for reproducibility
cluster_solution_bisecting_contextual <- nTARP_bisecting(data = iris[,1:4],
                                                         number_of_projections = 1000,
                                                         withinss_threshold = 0.36,
                                                         minimum_cluster_size_percent = 20,
                                                         contextual_variable = iris$Species
                                                         )
labeled_clusters <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting_contextual$BestClusters, ids = 1:nrow(iris))

## -----------------------------------------------------------------------------
labeled_clusters_with_context <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting_contextual$BestClusters, ids = 1:nrow(iris), contextual_variables_df = iris)
head(labeled_clusters_with_context)

## -----------------------------------------------------------------------------
table(labeled_clusters$FinalClusterID,iris$Species)

## -----------------------------------------------------------------------------
labeled_clusters <- consolidate_clusters(cluster_path_matrix = labeled_clusters,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
table(labeled_clusters$FinalClusterID,iris$Species)

## -----------------------------------------------------------------------------
library(HDclassif)
data(wine)
head(wine)
wine[,2:14] <- scale(wine[,2:14])

## -----------------------------------------------------------------------------
table(wine$class)

## -----------------------------------------------------------------------------
set.seed(123532) #random seed for reproducibility
cluster_solution_bisecting_wine <- nTARP_bisecting(data = wine[,2:14],
                                    number_of_projections = 100^2,
                                    withinss_threshold = 0.36,
                                    minimum_cluster_size_percent = 20 #default value
                                    )
labeled_clusters_wine <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting_wine$BestClusters, ids = 1:nrow(wine))
table(labeled_clusters_wine$FinalClusterID,wine$class)

## -----------------------------------------------------------------------------
#Merge clusters two more times
print("This is the result of combining clusters 1 and 2, during which 3-6 were relabled to be 1-4 with the merged cluster being labeled 5.")
labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
table(labeled_clusters_wine$FinalClusterID,wine$class)
#Merge 1 and 2 again
print("This is the result of combining clusters 3 and 4 (that were relabeled 1 and 2 in the last step). Now there are 4 clusters left.")
labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
table(labeled_clusters_wine$FinalClusterID,wine$class)
#Merge 1 and 2 again

print("We combine that last two clusters, giving us 3 clusters at the end.")
labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
table(labeled_clusters_wine$FinalClusterID,wine$class)

## -----------------------------------------------------------------------------
set.seed(123532) #random seed for reproducibility
cluster_solution_bisecting_wine <- nTARP_bisecting(data = wine[,2:14],
                                    number_of_projections = 100^2,
                                    withinss_threshold = 0.36,
                                    minimum_cluster_size_percent = 20,
                                    contextual_variable = wine$class
                                    )
labeled_clusters_wine <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting_wine$BestClusters, ids = 1:nrow(wine))
table(labeled_clusters_wine$FinalClusterID,wine$class)

## -----------------------------------------------------------------------------
#Merge clusters two more times
print("This is the result of combining clusters 1 and 2, during which 3-5 were relabled to be 1-3 with the merged cluster being labeled 4.")
labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
table(labeled_clusters_wine$FinalClusterID,wine$class)
#Merge 1 and 2 again
print("We combine that last two clusters, giving us 3 clusters at the end.")
labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine,
                                         first_cluster_to_combine = 1,
                                         second_cluster_to_combine = 2)
table(labeled_clusters_wine$FinalClusterID,wine$class)

Any scripts or data that you put into this service are public.

nTARP documentation built on March 20, 2026, 5:09 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

nTARP
Cluster Analysis Using Thresholding After Random Projections (n-TARP)

inst/doc/nTARP_documentation.R
In nTARP: Cluster Analysis Using Thresholding After Random Projections (n-TARP)

Try the nTARP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

nTARP Cluster Analysis Using Thresholding After Random Projections (n-TARP)

inst/doc/nTARP_documentation.R In nTARP: Cluster Analysis Using Thresholding After Random Projections (n-TARP)

Try the nTARP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

nTARP
Cluster Analysis Using Thresholding After Random Projections (n-TARP)

inst/doc/nTARP_documentation.R
In nTARP: Cluster Analysis Using Thresholding After Random Projections (n-TARP)