README.md

funclustVI

The following package was created by John Jewell as a part of an Undergraduate Dissertation at Western University under the supervision of Professor Camila de Souza. The package serves to cluster functional data using variational inference. More details in regards to the functionality of the package is available in the usage and examples section.

Installation

You can install the released version of funclustVI from CRAN with:

install.packages("funclustVI")

And the development version from GitHub with:

 install.packages("devtools")
devtools::install_github("jewelltaylor/funclustVI")

Usage

The funclustVI package offers two main functions: funclustVI and simulate.

funclustVI

The funclustVI function clusters functional data using a novel variational inference based approach and returns a fitted model.

Arguments

Return Value

The funclustVI function returns a list with the following entries:

simulate

The simulate function generates data, fits a model and evaluates the models predictions over a set number of iterations all using custom data generation, modelling and evaluation parameters specified by the user.

Arguments

Return Value

The simulate function returns a list with the following entries:

Examples

This is an example which shows you how to use the package to generate cluster assignments from functional data. Refer to usage section above for detailed requirements.

#Import the funclustVI package 
library(funclustVI)

#Data Parameters 
x = seq(from=0,to=pi/3, length = 100)
curves_per_cluster = 10
data_params = list()
data_params$x = x 
data_params$curves_per_cluster = curves_per_cluster
Y = Case_7(data_params)
K = 3
true_cluster_assignments = rep(1:K,each = curves_per_cluster)

# Model Parameters
init = "km"
nbasis = 6
convergence_threshold = 1
max_iterations = 10 
gamma_dist_config_matrix = matrix(0, 2, K)
gamma_dist_config_matrix[1, ] = c(78.125, 78.125, 78.125) * 100
gamma_dist_config_matrix[2, ] = c(12.5, 12.5, 12.5) * 100
verbose = FALSE
draw = FALSE
plot_params = list()
plot_params$xlim = NULL 
plot_params$ylim = c(1, 6)
plot_params$show_curves = FALSE 
plot_params$title = NULL

#Fit the model
model = funcslustVI(x, Y, K, true_cluster_assignments, init, nbasis, convergence_threshold, max_iterations, gamma_dist_config_matrix, verbose, draw, plot_params)

#Get the cluster assignments
cluster_assignemnts = model$cluster_assignments

print(cluster_assignemnts)
#>  [1] 1 1 1 2 1 1 1 1 1 1 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2

This is an example which shows how to use the funclustVI package to cluster real data including in the package. The data corresponds to the daily tempuratures in Vancouver and Toronto over the month of July over the 10 year period 1994 to 2003. Since draw = Traw a plot is generated showing the true function vs the estimated function for each cluster. Since show_curves is TRUE, the plot includes the actual curves with differing colors. Refer to above usage section for detailed requirements.

#Data Parameters
x = seq(1:31)
Y = dataset 
K = 2
curves_per_cluster = 9
true_cluster_assignments = rep(1:K,each = curves_per_cluster)

#Model Parameters 
init = "km"
nbasis = 10 
convergence_threshold = 1
max_iterations = 10 
gamma_dist_config_matrix = NULL 
verbose = FALSE
draw = TRUE
plot_params = list()
plot_params$xlim = NULL
plot_params$ylim = c(3, 40)
plot_params$show_curves = TRUE
plot_params$title = NULL

#Fit the model 
model = funcslustVI(x, Y, K, true_cluster_assignments, init, nbasis, convergence_threshold, max_iterations, gamma_dist_config_matrix, verbose, draw, plot_params)

This is an example which shows how to run simulations. Refer to above usage section for detailed requirements.

library(funclustVI)

#Initializationw
number_of_simulations = 1
save_path = NULL

#Data Parameters
x = seq(from=0,to=pi/3, length = 100)
K = 3
curves_per_cluster = 50 
true_cluster_assignments = rep(1:K,each = curves_per_cluster)
seeds = c(1:30)

#Pack into data parameter list
data_params = list()
data_params$x = x
data_params$K = K
data_params$curves_per_cluster = curves_per_cluster
data_params$true_cluster_assignments = true_cluster_assignments
data_params$seeds = seeds 
data_params$generate_data = Case_7

#Model Parameters
init = "km"
nbasis = 6
gamma_dist_config_matrix = matrix(0, 2, K)
gamma_dist_config_matrix[1, ] = c(78.125, 78.125, 78.125) * 5
gamma_dist_config_matrix[2, ] = c(12.5, 12.5, 12.5) * 5
convergence_threshold = 1
max_iterations = 10
verbose = FALSE
draw = FALSE

#Pack into model parameter list 
model_params = list()
model_params$model_func = get_funclustVI_cluster_assignments
model_params$init = "km"
model_params$nbasis = 6
model_params$gamma_dist_config_matrix = gamma_dist_config_matrix
model_params$convergence_threshold = convergence_threshold
model_params$max_iterations = max_iterations 
model_params$save_path = save_path
model_params$verbose = verbose
model_params$draw = draw
plot_params = list()
plot_params$xlim = NULL
plot_params$ylim = c(1, 6)
plot_params$show_curves = FALSE 
plot_params$title = NULL
model_params$plot_params = plot_params

#Evaluation parameter list 
eval_func_list = list()
eval_func_list$mismatch = get_mismatches
eval_func_list$vmeasure = get_v_measure

#Run the simulations
simulate(data_params, model_params, eval_func_list, number_of_simulations, save_path)
#> seed  1 : mismatch  =  5  vmeasure  =  0.8996935  
#> Average  mismatch  =  5 
#> Average  vmeasure  =  0.8996935
#> $result_matrix
#>      [,1]      [,2]
#> [1,]    5 0.8996935
#> 
#> $simulation_length
#> Time difference of 9.116129 secs
#> 
#> $eval_metric_avg_vector
#> [1] 5.0000000 0.8996935


jewelltaylor/funclustVI documentation built on June 1, 2022, 12:30 p.m.