R/simulation1.R

#source("graph_n_functional.R")
require(igraph)
require(reshape)
require(plyr)
require(Metrics)
require(pracma)
require(rpart)
require(party)
require(partykit)


set.seed(1)

n_simulations <- 1
rmse_v <- list()
rmse_v[["etree"]] <- rep(NA, n_simulations)
rmse_v[["dtree"]] <- rep(NA, n_simulations)
rmse_v[["ctree"]] <- rep(NA, n_simulations)

n.graphs <- 200
all.graphs <- list()
Y.s <- rep(NA, n.graphs)
for(j in 1:n_simulations) {
  print(paste("SIMULATION ", j))

  # creates 200 random graphs
  for(i in 1:n.graphs) {
    g <- sample_gnp(directed = F, loops = F, p = 0.03, n = 200)
    all.graphs[[i]] <- g

    coreness.distr = count(coreness(g)) # aggr. by count
    rownames(coreness.distr) <- coreness.distr$x # re-index the df by the shellness number
    coreness.distr = coreness.distr[c('freq')] # keep just the frequency column
    coreness.distr = t(coreness.distr) # transpose the df. Convert the column-df into row-df. This will ease the join with df.shellness.distr

    # sum of multiplication of the row number by the value in the row
    Y.s[[i]] <- dot(coreness.distr[1,], seq(0,ncol(coreness.distr)-1))
  }

  # shuffle the data with the same seed
  set.seed(123)
  Y.s <- sample(Y.s)
  set.seed(123)
  all.graphs <- sample(all.graphs)

  data <- all.graphs

  ###### split the data into train and test sets
  # n = length(Y.s)
  # rnd.ind = sample(x = seq(n), size = 0.8*n)
  # train = test = data
  # n.var <- length(data)

  # split the data
  # for(i in 1:length(data)) {
  #   if(class(train[[i]])=="numeric" | class(train[[i]])=="double" | class(train[[i]])=="list") {
  #     train[[i]] = train[[i]][c(rnd.ind)]
  #     test[[i]] = test[[i]][-c(rnd.ind)]
  #   }
  #   else if(class(train[[i]])=="data.frame") {
  #     train[[i]] = train[[i]][c(rnd.ind),]
  #     test[[i]] = test[[i]][-c(rnd.ind),]
  #   }
  # }

  myS  <-  mytree     (response = Y.s,
                       covariates = all.graphs,
                       case.weights = NULL,
                       minbucket = 2,
                       alpha = 0.05)
  plot(myS)
}


  ###### PREDICTION
  Y.test <- test[[which(names(test) == "Y")]]
  test <- test[which(names(test) != "Y")]
  my.pred <- my.predict(model = myS, newdata = test)

  ###### GET THE TABULAR DATA TO BE USED BY OTHER ALGORITHMS
  train <- m.data[c(rnd.ind),]
  test <- m.data[-c(rnd.ind),]
  test <- test[which(names(test) != "Y")]

  ###### COMPARISON WITH DECISION TREES (CART)
  dtree.model <- rpart(Y~., data=train)
  dtree.pred <- predict(object = dtree.model, newdata = test)

  ###### COMPARISON WITH CONDITIONAL TREES
  ctree.model <- ctree(formula = Y~., data = train)
  ctree.pred <- predict(object = ctree.model, newdata = test)

  rmse_v[["etree"]][j] <-  rmse(Y.test, my.pred)
  rmse_v[["dtree"]][j] <- rmse(Y.test, dtree.pred)
  rmse_v[["ctree"]][j] <- rmse(Y.test, ctree.pred)
}

###### COMPARISON
print(paste("MRMSE Energy Tree:", round(mean(rmse_v[["etree"]]),4)))
print(paste("MRMSE Decision Tree:", round(mean(rmse_v[["dtree"]]),4)))
print(paste("MRMSE Conditional Inference Tree:", round(mean(rmse_v[["ctree"]]),4)))

print(paste("SD RMSE Energy Tree:", round(sd(rmse_v[["etree"]]),4)))
print(paste("SD RMSE Decision Tree:", round(sd(rmse_v[["dtree"]]),4)))
print(paste("SD RMSE Conditional Inference Tree:", round(sd(rmse_v[["ctree"]]),4)))
tulliapadellini/energytree documentation built on May 14, 2020, 8:06 p.m.