Simulations/Simulation_TimeComparison_cluster.R

# Inizialization ----------------------------------------------------------

# Loading the libraries
library(fda.usc)
library(roahd)
library(energy)
library(entropy)
library(partykit)
library(igraph)
library(NetworkDistance)
library(microbenchmark)

# Loading the dataset
load("Regression_Sim_Dataset.RData")



# Datasets ----------------------------------------------------------------

norm01 <- function(x){(x-min(x))/(max(x)-min(x))}

### DATASET 1 (20 observations) ###

# Response
resp1 <- lapply(data, function(x) x$Y[1:20])[[1]]

# Building the numerical variable
foo <- fda.usc::min.basis(lapply(data, function(x) x$V1)[[2]], numbasis = 15)
fd3 <- fda.usc::fdata2fd(foo$fdata.est,
                         type.basis = "bspline",
                         nbasis = foo$numbasis.opt)
foo$coef <- t(fd3$coefs)

# Building the graph variable
conn.prob <- norm01(resp1)
conn.prob[which(conn.prob < 0.05)] <- conn.prob[which(conn.prob < 0.05)] + 0.1
set.seed(12345)
graph.list <- lapply(conn.prob, function(p){sample_gnp(100, p)})

# Covariates for the full mixed model
cov.list1 <- list(lapply(data, function(x) x$V1[1:20])[[1]], graph.list, foo$coef[1:20,15])


### DATASET 2 (100 observations) ###

# Response
resp2 <- lapply(data, function(x) x$Y[1:100])[[1]]

# Building the numerical variable
foo <- fda.usc::min.basis(lapply(data, function(x) x$V1)[[2]], numbasis = 15)
fd3 <- fda.usc::fdata2fd(foo$fdata.est,
                         type.basis = "bspline",
                         nbasis = foo$numbasis.opt)
foo$coef <- t(fd3$coefs)

# Building the graph variable
conn.prob <- norm01(resp2)
conn.prob[which(conn.prob < 0.05)] <- conn.prob[which(conn.prob < 0.05)] + 0.1
set.seed(12345)
graph.list <- lapply(conn.prob, function(p){sample_gnp(100, p)})

# Covariates for the full mixed model
cov.list2 <- list(lapply(data, function(x) x$V1[1:100])[[1]], graph.list, foo$coef[1:100,15])


### DATASET 3 (200 observations) ###

# Response
resp3 <- lapply(data, function(x) x$Y)[[1]]

# Building the numerical variable
foo <- fda.usc::min.basis(lapply(data, function(x) x$V1)[[2]], numbasis = 15)
fd3 <- fda.usc::fdata2fd(foo$fdata.est,
                         type.basis = "bspline",
                         nbasis = foo$numbasis.opt)
foo$coef <- t(fd3$coefs)

# Building the graph variable
conn.prob <- norm01(resp3)
conn.prob[which(conn.prob < 0.05)] <- conn.prob[which(conn.prob < 0.05)] + 0.1
set.seed(12345)
graph.list <- lapply(conn.prob, function(p){sample_gnp(100, p)})

# Covariates for the full mixed model
cov.list3 <- list(lapply(data, function(x) x$V1)[[1]], graph.list, foo$coef[,15])



# functions ---------------------------------------------------------------
## distances & expansions inside
source("functions.R")

mb_cl_fun_1 <- microbenchmark(etree(response = resp1,
                                    covariates = cov.list1,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_fun_2 <- microbenchmark(etree(response = resp2,
                                    covariates = cov.list2,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_fun_3 <- microbenchmark(etree(response = resp3,
                                    covariates = cov.list3,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)


# fun_test1 ---------------------------------------------------------------
## distances inside, expansions outside
source("fun_test1.R")

mb_cl_ft1_1 <- microbenchmark(etree(response = resp1,
                                    covariates = cov.list1,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_ft1_2 <- microbenchmark(etree(response = resp2,
                                    covariates = cov.list2,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_ft1_3 <- microbenchmark(etree(response = resp3,
                                    covariates = cov.list3,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)


# fun_test2 ---------------------------------------------------------------
## variable selection distances & expansions outside
source("fun_test2.R")

mb_cl_ft2_1 <- microbenchmark(etree(response = resp1,
                                    covariates = cov.list1,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_ft2_2 <- microbenchmark(etree(response = resp2,
                                    covariates = cov.list2,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_ft2_3 <- microbenchmark(etree(response = resp3,
                                    covariates = cov.list3,
                                    case.weights = NULL,
                                    minbucket = 10,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)


# fun_test3 ---------------------------------------------------------------
## (variable selection + clustering) distances & expansions outside
source("fun_test3.R")

mb_cl_ft3_1 <- microbenchmark(etree(response = resp1,
                                    covariates = cov.list1,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_ft3_2 <- microbenchmark(etree(response = resp2,
                                    covariates = cov.list2,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_ft3_3 <- microbenchmark(etree(response = resp3,
                                    covariates = cov.list3,
                                    case.weights = NULL,
                                    minbucket = 10,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)


# fun_test4 ---------------------------------------------------------------
## (variable selection + clustering + expansions) distances & expansions outside
source("fun_test4.R")

mb_cl_ft4_1 <- microbenchmark(etree(response = resp1,
                                    covariates = cov.list1,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_ft4_2 <- microbenchmark(etree(response = resp2,
                                    covariates = cov.list2,
                                    case.weights = NULL,
                                    minbucket = 5,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)

mb_cl_ft4_3 <- microbenchmark(etree(response = resp3,
                                    covariates = cov.list3,
                                    case.weights = NULL,
                                    minbucket = 10,
                                    alpha = 0.05,
                                    R = 1000,
                                    split.type = 'cluster',
                                    coef.split.type = 'test',
                                    nb = n.bas),
                              times = 10L)



# Comparison --------------------------------------------------------------

et_cl <- rbind(mb_cl_fun_1$time,
               mb_cl_fun_2$time,
               mb_cl_fun_3$time,
               mb_cl_ft1_1$time,
               mb_cl_ft1_2$time,
               mb_cl_ft1_3$time,
               mb_cl_ft2_1$time,
               mb_cl_ft2_2$time,
               mb_cl_ft2_3$time,
               mb_cl_ft3_1$time,
               mb_cl_ft3_2$time,
               mb_cl_ft3_3$time,
               mb_cl_ft4_1$time,
               mb_cl_ft4_2$time,
               mb_cl_ft4_3$time)

et_cl <- et_cl/1000000000

summary(et_cl)

save(mb_cl_fun_1,
     mb_cl_fun_2,
     mb_cl_fun_3,
     mb_cl_ft1_1,
     mb_cl_ft1_2,
     mb_cl_ft1_3,
     mb_cl_ft2_1,
     mb_cl_ft2_2,
     mb_cl_ft2_3,
     mb_cl_ft3_1,
     mb_cl_ft3_2,
     mb_cl_ft3_3,
     mb_cl_ft4_1,
     mb_cl_ft4_2,
     mb_cl_ft4_3,
     file = 'TimeComparison_cluster.RData')


### Legend ###
# cl: cluster
# fun: distances & expansions inside
# ft1: distances inside, expansions outside
# ft2: variable selection distances & expansions outside
# ft3: (variable selection + clustering) distances & expansions outside
# ft4: (variable selection + clustering + expansions) distances & expansions outside
# _1: small dataset (20 observations)
# _2: medium dataset (100 observations)
# _3: large dataset (200 observations)
tulliapadellini/energytree documentation built on May 14, 2020, 8:06 p.m.