Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
# Import the TIP library
library(tip)
# Import the US Arrests dataset
data(USArrests)
# Convert the data to a matrix
X <- data.matrix(USArrests)
# Compute the distance matrix
distance_matrix <- data.matrix(dist(X))
# Compute the temperature parameter estiamte
temperature <- 1/median(distance_matrix[upper.tri(distance_matrix)])
# For each subject, compute the point estimate for the number of similar
# subjects using univariate multiple change point detection (i.e.)
init_num_neighbors = get_cpt_neighbors(.distance_matrix = distance_matrix)
# Set the number of burn-in iterations in the Gibbs samlper
# RECOMENDATION: *** burn >= 1000 ***
burn <- 10
# Set the number of sampling iterations in the Gibbs sampler
# RECOMENDATION: *** samples >= 1000 ***
samples <- 10
# Extract the state names
names_subjects <- rownames(USArrests)
# Run TIP clustering using only the prior
# --> That is, the likelihood function is constant
tip1 <- tip(.data = data.matrix(X),
.burn = burn,
.samples = samples,
.similarity_matrix = exp(-1.0*temperature*distance_matrix),
.init_num_neighbors = init_num_neighbors,
.likelihood_model = "NIW",
.subject_names = names_subjects,
.num_cores = 1)
## -----------------------------------------------------------------------------
# Produce plots for the Bayesian Clustering Model
tip_plots <- plot(tip1)
## -----------------------------------------------------------------------------
# View the posterior distribution of the number of clusters
tip_plots$trace_plot_posterior_number_of_clusters
## -----------------------------------------------------------------------------
# View the trace plot with respect to the posterior number of clusters
tip_plots$trace_plot_posterior_number_of_clusters
## -----------------------------------------------------------------------------
# Extract posterior cluster assignments using the Posterior Expected Adjusted Rand (PEAR) index
cluster_assignments <- mcclust::maxpear(psm = tip1@posterior_similarity_matrix)$cl
# Create a list where each element stores the cluster assignments
cluster_assignment_list <- list()
for(k in 1:length(unique(cluster_assignments))){
cluster_assignment_list[[k]] <- names_subjects[which(cluster_assignments == k)]
}
cluster_assignment_list
## -----------------------------------------------------------------------------
# Create the one component graph with minimum entropy
partition_list <- partition_undirected_graph(.graph_matrix = tip1@posterior_similarity_matrix,
.num_components = 1,
.step_size = 0.001)
## -----------------------------------------------------------------------------
# View the state names
# names_subjects
# Create a vector of true region labels to see if there is a pattern
true_region <- c("Southeast", "West", "Southwest", "Southeast", "West", "West",
"Northeast", "Northeast", "Southeast", "Southeast", "West", "West",
"Midwest", "Midwest", "Midwest", "Midwest", "Southeast", "Southeast",
"Northeast", "Northeast", "Northeast", "Midwest", "Midwest", "Southeast",
"Midwest", "West", "Midwest", "West", "Northeast", "Northeast",
"Southwest", "Northeast", "Southeast", "Midwest", "Midwest", "Southwest",
"West", "Northeast", "Northeast", "Southeast", "Midwest", "Southeast",
"Southwest", "West", "Northeast", "Southeast", "West", "Southeast",
"Midwest", "West")
names_subjects
# Associate class labels and colors for the plot
class_palette_colors <- c("Northeast" = "blue",
"Southeast" = "red",
"Midwest" = "purple",
"Southwest" = "orange",
"West" = "green")
# Associate class labels and shapes for the plot
class_palette_shapes <- c("Northeast" = 15,
"Southeast" = 16,
"Midwest" = 17,
"Southwest" = 18,
"West" = 19)
# Visualize the posterior similarity matrix by constructing a graph plot of
# the one-cluster graph. The true labels are used here (below they are not).
ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix,
.subject_names = names_subjects,
.subject_class_names = true_region,
.class_colors = class_palette_colors,
.class_shapes = class_palette_shapes,
.node_size = 2,
.add_node_labels = TRUE)
## -----------------------------------------------------------------------------
# Visualize the posterior similarity matrix by constructing a graph plot of
# the one-cluster graph. The true labels are used here (below they are not).
# Remove the subject names with .add_node_labels = FALSE
ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix,
.subject_names = names_subjects,
.subject_class_names = true_region,
.class_colors = class_palette_colors,
.class_shapes = class_palette_shapes,
.node_size = 2,
.add_node_labels = FALSE)
## -----------------------------------------------------------------------------
# Construct a network plot without class labels
# Note: Subject labels may be suppressed using .add_node_labels = FALSE.
ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix,
.subject_names = names_subjects,
.node_size = 2,
.add_node_labels = TRUE)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.