The following package was created by John Jewell as a part of an Undergraduate Dissertation at Western University under the supervision of Professor Camila de Souza. The package serves to cluster functional data using variational inference. More details in regards to the functionality of the package is available in the usage and examples section.
You can install the released version of funclustVI from CRAN with:
install.packages("funclustVI")
And the development version from GitHub with:
install.packages("devtools")
devtools::install_github("jewelltaylor/funclustVI")
The funclustVI package offers two main functions: funclustVI and simulate.
The funclustVI function clusters functional data using a novel variational inference based approach and returns a fitted model.
The funclustVI function returns a list with the following entries:
The simulate function generates data, fits a model and evaluates the models predictions over a set number of iterations all using custom data generation, modelling and evaluation parameters specified by the user.
The simulate function returns a list with the following entries:
This is an example which shows you how to use the package to generate cluster assignments from functional data. Refer to usage section above for detailed requirements.
#Import the funclustVI package
library(funclustVI)
#Data Parameters
x = seq(from=0,to=pi/3, length = 100)
curves_per_cluster = 10
data_params = list()
data_params$x = x
data_params$curves_per_cluster = curves_per_cluster
Y = Case_7(data_params)
K = 3
true_cluster_assignments = rep(1:K,each = curves_per_cluster)
# Model Parameters
init = "km"
nbasis = 6
convergence_threshold = 1
max_iterations = 10
gamma_dist_config_matrix = matrix(0, 2, K)
gamma_dist_config_matrix[1, ] = c(78.125, 78.125, 78.125) * 100
gamma_dist_config_matrix[2, ] = c(12.5, 12.5, 12.5) * 100
verbose = FALSE
draw = FALSE
plot_params = list()
plot_params$xlim = NULL
plot_params$ylim = c(1, 6)
plot_params$show_curves = FALSE
plot_params$title = NULL
#Fit the model
model = funcslustVI(x, Y, K, true_cluster_assignments, init, nbasis, convergence_threshold, max_iterations, gamma_dist_config_matrix, verbose, draw, plot_params)
#Get the cluster assignments
cluster_assignemnts = model$cluster_assignments
print(cluster_assignemnts)
#> [1] 1 1 1 2 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2
This is an example which shows how to use the funclustVI package to cluster real data including in the package. The data corresponds to the daily tempuratures in Vancouver and Toronto over the month of July over the 10 year period 1994 to 2003. Since draw = Traw a plot is generated showing the true function vs the estimated function for each cluster. Since show_curves is TRUE, the plot includes the actual curves with differing colors. Refer to above usage section for detailed requirements.
#Data Parameters
x = seq(1:31)
Y = dataset
K = 2
curves_per_cluster = 9
true_cluster_assignments = rep(1:K,each = curves_per_cluster)
#Model Parameters
init = "km"
nbasis = 10
convergence_threshold = 1
max_iterations = 10
gamma_dist_config_matrix = NULL
verbose = FALSE
draw = TRUE
plot_params = list()
plot_params$xlim = NULL
plot_params$ylim = c(3, 40)
plot_params$show_curves = TRUE
plot_params$title = NULL
#Fit the model
model = funcslustVI(x, Y, K, true_cluster_assignments, init, nbasis, convergence_threshold, max_iterations, gamma_dist_config_matrix, verbose, draw, plot_params)
This is an example which shows how to run simulations. Refer to above usage section for detailed requirements.
library(funclustVI)
#Initializationw
number_of_simulations = 1
save_path = NULL
#Data Parameters
x = seq(from=0,to=pi/3, length = 100)
K = 3
curves_per_cluster = 50
true_cluster_assignments = rep(1:K,each = curves_per_cluster)
seeds = c(1:30)
#Pack into data parameter list
data_params = list()
data_params$x = x
data_params$K = K
data_params$curves_per_cluster = curves_per_cluster
data_params$true_cluster_assignments = true_cluster_assignments
data_params$seeds = seeds
data_params$generate_data = Case_7
#Model Parameters
init = "km"
nbasis = 6
gamma_dist_config_matrix = matrix(0, 2, K)
gamma_dist_config_matrix[1, ] = c(78.125, 78.125, 78.125) * 5
gamma_dist_config_matrix[2, ] = c(12.5, 12.5, 12.5) * 5
convergence_threshold = 1
max_iterations = 10
verbose = FALSE
draw = FALSE
#Pack into model parameter list
model_params = list()
model_params$model_func = get_funclustVI_cluster_assignments
model_params$init = "km"
model_params$nbasis = 6
model_params$gamma_dist_config_matrix = gamma_dist_config_matrix
model_params$convergence_threshold = convergence_threshold
model_params$max_iterations = max_iterations
model_params$save_path = save_path
model_params$verbose = verbose
model_params$draw = draw
plot_params = list()
plot_params$xlim = NULL
plot_params$ylim = c(1, 6)
plot_params$show_curves = FALSE
plot_params$title = NULL
model_params$plot_params = plot_params
#Evaluation parameter list
eval_func_list = list()
eval_func_list$mismatch = get_mismatches
eval_func_list$vmeasure = get_v_measure
#Run the simulations
simulate(data_params, model_params, eval_func_list, number_of_simulations, save_path)
#> seed 1 : mismatch = 5 vmeasure = 0.8996935
#> Average mismatch = 5
#> Average vmeasure = 0.8996935
#> $result_matrix
#> [,1] [,2]
#> [1,] 5 0.8996935
#>
#> $simulation_length
#> Time difference of 9.116129 secs
#>
#> $eval_metric_avg_vector
#> [1] 5.0000000 0.8996935
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.