library(purrr) library(FDclassifieR)
y <- create.labels(N=10000, rho=0.5) table(y)
gs <- create.scores.gaussian(y, auc=0.9) plot.scores(gs,y)
d <- data.table() for (n in c(200, 300, 400, 500, 700, 1000)) { p1 <- pcr(gs, y, sample_size = 100, sample_n = n) res <- check.pcr(p1) res <- t(c(res, sampleSize=100, sampleN=n)) d <- rbind(d, res) } d
check.pcr(p1)
plot(p1)
get_fermi(auc.rank(gs, y), rho=attr(y, 'rho'))
auclist <- create.auclist(0.8, 0.81, 10) data_g <- create_predictions(n=10000, auclist = auclist, p=0.05)
estimated_rank <- ensemble.fermi(data_g$predictions, data_g$actual_labels, debug.flag=T)
plot.scores(data_g$predictions[,10], data_g$actual_labels)
plot.scores(estimated_rank, data_g$actual_labels)
fde1 <- fde(data_g$predictions) fde1 <- calculate_performance(fde1, data_g$actual_labels, alpha=1) #auc.rank(fde1@estimated_prob, data_g$actual_labels)
fde2 <- predict_performance(fde1, auclist, 0.2, alpha=2) print(auc.rank(fde2@estimated_rank, data_g$actual_labels)) auc.rank(fde2@estimated_prob, data_g$actual_labels)
plot(fde1@estimated_rank, fde1@estimated_prob) points(fde2@estimated_rank, fde2@estimated_prob)
plot_single(fde1, 'score')
auclist1 <- generate.auclist(0.6, 0.7, 10) auclist2 <- generate.auclist(0.7, 0.8, 10) auclist3 <- generate.auclist(0.8, 0.9, 10)
rho0 <- 0.75 Mlist <- c(10, 15, 20, 25, 30, 35, 40,45, 50) res1 <- data.frame() for (m in Mlist) { auclist <- generate_auclist(0.5, 0.6, m) glist <- generate.ensemble(auclist, N=1000, rho=rho0) tmp <- unlist(map(glist, auc)) a <- data.frame(M=m, aucSummap=tmp['summap'], aucMin=min(tmp[1:length(auclist)]), aucMean=mean(tmp[1:length(auclist)]), aucMax=max(tmp[1:length(auclist)])) res1 <- rbind(res1, a) } res1$range <- "0.5-0.6" res1$rho <- rho0 #lll <- ensemble.gaussian(glist, view=T) res2 <- data.frame() for (m in Mlist) { auclist <- generate_auclist(0.6, 0.7, m) glist <- generate.ensemble(auclist, N=1000, rho=rho0) tmp <- unlist(map(glist, auc)) a <- data.frame(M=m, aucSummap=tmp['summap'], aucMin=min(tmp[1:length(auclist)]), aucMean=mean(tmp[1:length(auclist)]), aucMax=max(tmp[1:length(auclist)])) res2 <- rbind(res2, a) } res2$range <- "0.6-0.7" res2$rho <- rho0 res3 <- data.frame() for (m in Mlist) { auclist <- generate_auclist(0.7, 0.8, m) glist <- generate.ensemble(auclist, N=1000, rho=rho0) tmp <- unlist(map(glist, auc)) a <- data.frame(M=m, aucSummap=tmp['summap'], aucMin=min(tmp[1:length(auclist)]), aucMean=mean(tmp[1:length(auclist)]), aucMax=max(tmp[1:length(auclist)])) res3 <- rbind(res3, a) } res3$range <- "0.7-0.8" res3$rho <- rho0 res <- rbind(res1, res2, res3)
library(ggplot2) ggplot(data=res) + geom_line(aes(x=M, y=aucSummap, col=range)) + geom_point(aes(x=M, y=aucMean, col=range)) + geom_point(aes(x=M, y=aucMin, col=range)) + geom_point(aes(x=M, y=aucMax, col=range )) + ylim(0.5, 1.0) + ylab('AUC_SUMMA+') ggsave('rho_0.75_sim_m.pdf')
res1
glist <- generate.ensemble(auclist1, N=100, rho=0.48)
ensemble.gaussian(glist, view=T)
library(rfermiclassifier) testsumma <- function(auc0, auc1, method, rho, M) { auclist <- generate.auclist(auc0, auc1, M) summap <- generate.ensemble(auclist, N=1000, rho=rho, method=method) df <- confMatrix(summap$summap) df$method <- paste0('summa', method) df$aucrange <- paste0(auc0, '-', auc1) df$rho <- rho df$M <- M df } testsumma(0.5, 0.6, '', 0.5, 10)
res <- data.frame() for (rho in c(0.25, 0.5, 0.75)) { for (j in (0:3)) { for (m in (1:10)*5) { for (i in 1:10) { a <- testsumma(0.5+j*0.1, 0.6+j*0.1, '+', rho, m) b <- testsumma(0.5+j*0.1, 0.6+j*0.1, '', rho, m) res <- rbind(res, a, b) } } } } res
ggplot(data=res[res$rho == 0.5, ], aes(M, auc, color=method)) + geom_point() + geom_smooth(method = "loess") + facet_wrap(~aucrange) ggsave(filename='sim_MvsAuc_r5.pdf', width=8)
ggplot(data=res[res$rho == 0.75, ], aes(M, specificity, color=method)) + geom_point() + geom_smooth(method = "loess") + facet_wrap(~aucrange) ggsave('sim_MvsSpec_r75.pdf', width=8)
ggplot(data=res[res$rho == 0.75, ], aes(M, baccuracy, color=method)) + geom_point() + geom_smooth(method = "loess") + facet_wrap(~aucrange) ggsave('sim_Mvsbacc_r75.pdf', width=8)
ggplot(data=res) + geom_boxplot(aes(x=aucrange, y=auc, color=method)) + facet_wrap(~rho, scales='free') ggsave('sim_method_comp.pdf', width=8)
ggplot(data=res[res$method == 'summa', ], aes(M, auc, color=aucrange)) + geom_point() + geom_smooth(method = "loess") + facet_wrap(~rho) + ylim(0.5, 1) ggsave('sim_summa.pdf', width=8)
getbauc <- function(x) { if (x == '0.5-0.6') 0.6 else if (x == '0.6-0.7') 0.7 else if (x == '0.7-0.8') 0.8 else 0.9 } res$bauc <- unlist(lapply(res$aucrange, FUN=getbauc)) res$gain <- 100*(res$auc - res$bauc)/res$bauc
ggplot(data=res[res$method == 'summa+', ], aes(M, gain, color=aucrange)) + geom_point() + geom_smooth(method = "loess") + facet_wrap(~rho) + ylab('Gain (%)') + xlab('# of base classifiers') ggsave('sim_summa+_gain.pdf', width=8)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.