knitr::opts_chunk$set(echo = TRUE) library(FDclassifieR)
set.seed(1024) #yeast <- read_table(url("http://archive.ics.uci.edu/ml/machine-learning-databases/yeast/yeast.data")) tmp <- read.table('data/yeast.csv') names(tmp)<- c("SequenceName", "mcg", "gvh", "alm", "mit", "erl", "pox", "vac", "nuc", "LocalizationSite") #head(tmp) #table(tmp$LocalizationSite) # choose only 'CYT' and 'NUC', ignore SequnceName Yeast <- tmp[tmp$LocalizationSite %in% c('CYT', 'NUC'), 2:10] names(Yeast)[ncol(Yeast)] <- 'Site' Yeast$Site <- factor(Yeast$Site, c('CYT', 'NUC')) inTraining0 <- createDataPartition(Yeast$Site, p = .75, list = FALSE) training <- Yeast[ inTraining0,] testing <- Yeast[-inTraining0,] testingY <- to_label(Yeast[-inTraining0, ncol(Yeast)])
table(Yeast$Site)
pca <- princomp(tmp[, 2:9], cor=T) # principal components analysis using correlation matrix pc.comp <- pca$scores PrincipalComponent1 <- -1*pc.comp[,1] # principal component 1 scores (negated for convenience) PrincipalComponent2 <- -1*pc.comp[,2] # principal component 2 scores (negated for convenience) clustering.data <- cbind(PrincipalComponent1, PrincipalComponent2)
set.seed(100) km <- kmeans(clustering.data, 8, iter.max = 30, nstart=30) #km km$cluster plot(PrincipalComponent1, PrincipalComponent2, col=km$cluster) points(km$centers, pch=16) aggregate(tmp[, 2:9],by=list(km$cluster),mean) table(km$cluster, tmp$LocalizationSite)
library(kknn) cl <- specClust(clustering.data, centers=8, nn=50, iter.max=100) #cl plot(PrincipalComponent1, PrincipalComponent2, col=cl$cluster) table(cl$cluster, tmp$LocalizationSite) aggregate(tmp[, 2:9],by=list(cl$cluster),mean)
d_yeast<- dist(clustering.data) hclusters <- hclust(d_yeast, method = "average") clusterCut <- cutree(hclusters, 8) #clusterCut table(clusterCut, tmp$LocalizationSite) aggregate(tmp[, 2:9],by=list(clusterCut),mean) plot(PrincipalComponent1, PrincipalComponent2, col=clusterCut)
model_list <- c('nnet', 'rda', 'svmLinear', 'svmRadial', 'pls', 'knn', 'earth', 'avNNet', 'mlp', 'nb', 'rf', 'rpart', 'ctree', 'C5.0', 'gbm', 'bayesglm', 'glm', 'glmnet', 'simpls') t1 <- mtrainer(model_list, dataInfo = 'Yeast')
t1 <- train(t1, Site~., training, update=T)
plot(t1)
summary(s1)
t1 <- t1 %>% addmodel.mtrainer(c('ctree', 'C5.0', 'gbm', 'svmLinear', 'svmRadial', 'pls', 'earth', 'avNNet')) %>% train(Site~., training)
t1 <- predict(t1, newdata=testing) #auclist <- apply(t1$predictions, 2, auc.rank, testingY) fde1 <- fde(t1$predictions) fde1 <- calculate_performance(fde1, testingY, alpha=7) #fde1 <- predict_performance(fde1, auclist, attr(testingY, 'rho'))
#plot_performance(fde1, nsample=100, trendline=F) plot_performance_nmethods(fde1, nmethod_list = 3:15, nsample=100)
plot_cor(fde1, class_flag='positive') plot_cor(fde1, class_flag='negative')
fde1 <- fde(t1$predictions, testingY)
plot_single(fde1, 'score')
store.mtrainer(t1, 'yeast_m8_pre.RData') saveRDS(testingY, 'yeast_m8_y.RData')
saveRDS(t1, 'yeast_all.RData')
plot_ensemble(fde1, method='invauc', alpha=0.8)
y <- to_label(fde1@actual_label, class1 = 'NUC') p1 <- pcr(fde1@predictions[,10], y, sample_size = 100, sample_n = 1000)
plot(p1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.