knitr::opts_chunk$set(echo = TRUE)

Install package

remotes::install_github("oezgesahin/vineclust")

Get started

library(vineclust)
# data from UCI Machine Learning Repository 
data_wisc <- read.csv("http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data", header = FALSE)
 # R-vine copula based mixture model with total components 2
fit <- vcmm(data=data_wisc[,c(15,27,29,30)], total_comp=2)

print(fit)

summary(fit) 
RVMs_fitted <- list()
RVMs_fitted[[1]] <- VineCopula::RVineMatrix(Matrix=fit$output$vine_structure[,,1],
                        family=fit$output$bicop_familyset[,,1],
                        par=fit$output$bicop_param[,,1],
                        par2=fit$output$bicop_param2[,,1])
RVMs_fitted[[2]] <- VineCopula::RVineMatrix(Matrix=fit$output$vine_structure[,,2],
                        family=fit$output$bicop_familyset[,,2],
                        par=fit$output$bicop_param[,,2],
                        par2=fit$output$bicop_param2[,,2])
dvcmm(c(2.747, 0.1467, 0.13, 0.05334), fit$output$margin, fit$output$marginal_param, RVMs_fitted, fit$output$mixture_prob)
# C-vine copula based mixture model
fit_cvine <- vcmm(data=data_wisc[,c(15,27,29,30)], total_comp=2, is_cvine=1)  
# Confusion matrix w.r.t. true classification
table(fit_cvine$cluster, data_wisc$V2) 
# Fit only bivariate Clayton copula for pairs of variables in both components
fit_clayton <- vcmm(data=data_wisc[,c(15,27,29,30)], total_comp=2, bicop=c(3))  

# Fix vine tree structures of both components 
fit_fix_vinestr <- vcmm(data=data_wisc[,c(15,27,29,30)], total_comp=2,
                        vinestr=matrix(c(1,2,3,4,0,2,4,3,0,0,4,3,0,0,0,3),4,4)) 

# Run ECM iterations shorter with a smaller threshold than the default threshold
fit_sthr <- vcmm(data=data_wisc[,c(15,27,29,30)], total_comp=2, threshold=0.001)  

# Use a different initial partition approach from k-means
fit_best_init <- vcmm(data=data_wisc[,c(15,27,29,30)], total_comp=2, methods=c("gmm"))  

Simulation

# Simulation setup given in Section 5.2 of the paper at https://arxiv.org/pdf/2102.03257.pdf
dims <- 3
obs <- c(500,500) 
RVMs <- list()
RVMs[[1]] <- VineCopula::RVineMatrix(Matrix=matrix(c(1,3,2,0,3,2,0,0,2),dims,dims),
                        family=matrix(c(0,3,4,0,0,14,0,0,0),dims,dims),
                        par=matrix(c(0,0.8571429,2.5,0,0,5,0,0,0),dims,dims),
                        par2=matrix(sample(0, dims*dims, replace=TRUE),dims,dims)) 
RVMs[[2]] <- VineCopula::RVineMatrix(Matrix=matrix(c(1,3,2,0,3,2,0,0,2), dims,dims),
                        family=matrix(c(0,6,5,0,0,13,0,0,0), dims,dims),
                        par=matrix(c(0,1.443813,11.43621,0,0,2,0,0,0),dims,dims),
                        par2=matrix(sample(0, dims*dims, replace=TRUE),dims,dims))
margin <- matrix(c('Normal', 'Gamma', 'Lognormal', 'Lognormal', 'Normal', 'Gamma'), 3, 2) 
margin_pars <- array(0, dim=c(2, 3, 2))
margin_pars[,1,1] <- c(1, 2)
margin_pars[,1,2] <- c(1.5, 0.4)
margin_pars[,2,1] <- c(1, 0.2)
margin_pars[,2,2] <- c(18, 5)
margin_pars[,3,1] <- c(0.8, 0.8)
margin_pars[,3,2] <- c(1, 0.2)
x_data <- rvcmm(dims, obs, margin, margin_pars, RVMs)


pderdeyn/Stats230VineCopulaClustering documentation built on March 21, 2022, 12:56 a.m.