knitr::opts_chunk$set(echo = TRUE)

library(FDclassifieR)
library(foreign)

Prepare Data

set.seed(1024)
Seismic <- read.arff('data/seismic-bumps.arff')
Seismic$class <- as.factor(ifelse(Seismic$class == "0", "nonhaz", "haz"))

inTraining0 <- createDataPartition(Seismic$class, p = .75, list = FALSE)
training <- Seismic[ inTraining0,]
testing  <- Seismic[-inTraining0,]
testingY <- to_label(Seismic[-inTraining0, ncol(Seismic)])
table(Seismic$class)

Data analysis

tmp <- Seismic[,c(4:7,9:13,17:18)]
pca <- princomp(tmp, cor=T) # principal components analysis using correlation matrix
pc.comp <- pca$scores
PrincipalComponent1 <- -1*pc.comp[,1] # principal component 1 scores (negated for convenience)
PrincipalComponent2 <- -1*pc.comp[,2] # principal component 2 scores (negated for convenience)
clustering.data <- cbind(PrincipalComponent1, PrincipalComponent2)

K-Mean Clustering

set.seed(100)
km <- kmeans(clustering.data, 8, iter.max = 30, nstart=30)
#km
km$cluster
plot(PrincipalComponent1, PrincipalComponent2, col=km$cluster)
points(km$centers, pch=16)

aggregate(tmp[, 2:9],by=list(km$cluster),mean)
table(km$cluster, Seismic$class)

Spectral Clustering

library(kknn)
cl   <- specClust(clustering.data, centers=8, nn=50, iter.max=100) 
#cl
plot(PrincipalComponent1, PrincipalComponent2, col=cl$cluster)

table(cl$cluster, Seismic$class)

aggregate(tmp[, 2:9],by=list(cl$cluster),mean)

Hierarchical Clustering

d_yeast<- dist(clustering.data)
hclusters <- hclust(d_yeast, method = "average")
clusterCut <- cutree(hclusters, 8)
#clusterCut
table(clusterCut, Seismic$class)
aggregate(tmp,by=list(clusterCut),mean)

plot(PrincipalComponent1, PrincipalComponent2, col=clusterCut)

Train models

model_list <- c('nnet', 'rda', 'svmLinear', 'svmRadial', 'pls', 'knn', 'earth', 'avNNet', 'mlp', 'nb', 'rf', 'rpart', 'ctree', 'C5.0', 'gbm', 'bayesglm', 'glm', 'glmnet', 'simpls')

t1 <- mtrainer(model_list, dataInfo = 'Seismic')
t1 <- train(t1, class~., training)
plot(t1)
summary(s1)
t1 <- t1 %>%
  addmodel.mtrainer(c('ctree', 'C5.0', 'gbm', 'svmLinear', 'svmRadial', 'pls', 'earth', 'avNNet')) %>%
  train(class~., training) 
t1 <- predict(t1, newdata=testing)
#auclist <- apply(t1$predictions, 2, auc.rank, testingY)

fde1 <- fde(t1$predictions, testingY)
#fde1 <- predict_performance(fde1, auclist, attr(testingY, 'rho'))
plot_cor(fde1, legend_flag = T)
fde1 <- fde(t1$predictions, testingY)
plot_single(fde1, 'score')
store.mtrainer(t1, 'seismic_m8_pre.RData')
saveRDS(testingY, 'seismic_m8_y.RData')
saveRDS(t1, 'seismic_all.RData')
y <- testingY
p1 <- pcr(t1$predictions[,10], y, sample_size = 100, sample_n = 500)
check.pcr(p1)
plot(p1)


sungcheolkim78/FiDEL documentation built on Nov. 13, 2024, 7:58 a.m.