knitr::opts_chunk$set(echo = TRUE)
library(FiDEL)
library(latex2exp)
library(ggpubr)
library(tictoc)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

Generate sample classifer

# generate labels
y <- create.labels(N = 100000, rho=0.2)

# generate scores with a specific AUC from gaussian distribution
gs <- create.scores.gaussian(y, auc=0.9, tol = 0.0001)
ds <- build_curve(gs, y)
g1 <- plot.scores(gs, y)
g1
g2 <- plot.curves(ds, filename='results/curves.png')
g2

Sampling for class probability at given rank

# N is a maximum rank, M is the number of sampling
library(tictoc)
tic()
pcr1 <- pcr(gs, y, sample_size=1000, sample_n=300)
#x <- cal_pcr(gs, y, N=4020, M=500)
toc()
#plot(pcr1)
head(pcr1)

Calculation AUC

To check $\Delta = - $, we can calculate $$ and $$ seperately.

rankprob <- pcr1

print(sum(rankprob$rank * rankprob$prob)/sum(rankprob$prob))
print(sum(rankprob$rank * (1 - rankprob$prob)/sum(1 - rankprob$prob)))
sigma.rank(rankprob, debug.flag = T)
auc.rank(pcr1)
auprc.rank(pcr1)
rankprob <- cal.fromRank(rankprob)
plot.curves(pcr1)

Calculate Pxy by sampling two ranks from class 1 and class 2

auc.Pxysample(pcr1, iter = 10000)

Calculate Pxy using summation over all possible conditions.

auc.Pxysum(pcr1, debug.flag = T)
library(pROC)
roc_test <- roc(y, gs)
auc(roc_test)
plot(roc_test, xlim=c(1,0), ylim=c(0,1))
plot.scores(gc)

Confidence Interval

Pxxy.sample(rankprob, iter=5000, debug.flag=T)
Pxxy.sum(rankprob)
Pxxy.sum2(rankprob)
b <- attr(pcr1, 'beta')*attr(pcr1, 'N')
m <- attr(pcr1, 'mu')/attr(pcr1, 'N')
Pxxy_int(b, m, attr(pcr1, 'rho'))
Pxyy.sample(rankprob, iter=5000, debug.flag=T)
Pxyy.sum(rankprob)
Pxyy.sum2(rankprob)
b <- attr(pcr1, 'beta')*attr(pcr1, 'N')
m <- attr(pcr1, 'mu')/attr(pcr1, 'N')
Pxyy_int(b, m, attr(pcr1, 'rho'))
var.auc(pcr1, debug.flag = T)
ci(roc_test)
var_auc_fermi(0.55, 0.5, N=1000, method='sampling', debug.flag = T)
var_auc_fermi(0.55, 0.5, N=1000, method='integral', debug.flag = T)
cat('Pxxy: ', Pxxy_int(2.6429, -0.4156, 0.1))
cat('Pxyy: ', Pxyy_int(2.6429, -0.4156, 0.1))
auc
var_auc_fermi(0.895, 283, 25374, iter=8000, debug.flag = T)
var_auc_fermi(0.942, 780, 67228, iter=10000, debug.flag = T)

Sampling Check

N0list <- c(1000, 10000, 100000)
sampleratios <- c(0.2, 0.4, 0.6, 0.8)
Mlist <- c(100, 200, 300, 400)
rholist <- c(0.1, 0.3, 0.5, 0.7, 0.9)
auclist <- c(0.6, 0.7, 0.8, 0.9)
a0 <- 0.8
res <- data.table()

for (N0 in N0list) {
  for(r in rholist) {
    for (a in auclist) {
      y <- create.labels(N=N0, rho=r)
      gs <- create.scores.gaussian(y, auc=a)
      for (rs in sampleratios) {
        Ns <- floor(N0*rs)
        for (M in Mlist) {
          pcr1 <- pcr(gs, y, sample_size=Ns, sample_n=M)
          res <- rbind(res, get_pcr(pcr1))
          rm(pcr1)
          pcr1 <- pcr(gs, y, sample_size=Ns, sample_n=M)
          res <- rbind(res, get_pcr(pcr1))
          rm(pcr1)
          pcr1 <- pcr(gs, y, sample_size=Ns, sample_n=M)
          res <- rbind(res, get_pcr(pcr1))
          rm(pcr1)
        }
      }
    }
  }
}
library(tibble)
res <- as_tibble(res)
res
library(ggplot2)
library(dplyr)
res$NN0 <- res$N/res$N0

g1 <- res %>% 
  ggplot(aes(x=N0, y=auc_pcr/auc0)) + geom_point(aes(size=rho, color=auc0)) + 
  theme_classic() + xlab('N.data') + ylab('AUC_p/AUC_data') + geom_hline(yintercept = 1, linetype='dashed') +
  scale_x_continuous(trans='log10') + geom_jitter(height = 0.0001, width = 0.1) +
  facet_wrap(vars(NN0, M), labeller="label_both", ncol=4, scales="free")
ggsave('R-N0-convergence.pdf', width=14, height=14)
g1
g2 <- ggplot(res, aes(x=M, y=auc_pcr/auc0)) + geom_point(aes(color=auc0, size=rho)) +
  theme_classic() + xlab('M') + ylab('AUC_p/AUC_data') + geom_hline(yintercept = 1.0, linetype='dashed') +
  geom_jitter(height = 0.0001, width = 10) + 
  facet_wrap(vars(NN0, N0), labeller="label_both", ncol=3, scales="free")
ggsave('R-M-convergence.pdf', width=14, height=14)
g2
g3 <- ggplot(res, aes(x=rho, y=auc_pcr/auc0)) + geom_point(aes(color=auc0, size=N0)) +
  theme_classic() + xlab('rho') + ylab('AUC_p/AUC_data') + geom_hline(yintercept = 1.0, linetype='dashed') +
  geom_jitter(height = 0.0001, width = 0.02) +
  facet_wrap(vars(NN0, M), labeller="label_both", ncol=4, scales="free")
ggsave('R-rho-convergence.pdf', width=14, height = 14)
g3
g4 <- ggplot(res, aes(x=auc0, y=auc_pcr/auc0)) + geom_point(aes(color=M, size=rho)) +
  theme_classic() + xlab('AUC') + ylab('AUC_p/AUC_data') + geom_hline(yintercept = 1.0, linetype='dashed') +
  geom_jitter(height = 0.0001, width = 0.01) +
  facet_wrap(vars(NN0, M), labeller="label_both", ncol=4, scales="free")
ggsave('R-auc-convergence.pdf', width=14, height = 14)
g4
library(ggplot2)
library(dplyr)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
#scale_colour_manual(values=cbPalette)

res$NN0 <- res$N/res$N0

g1 <- res %>% 
  ggplot(aes(x=N0, y=auc_pcr/auc0)) + geom_point() + 
  theme_classic() + xlab('N.data') + ylab('AUC_p/AUC_data') + 
  geom_hline(yintercept = 1, linetype='dashed') +
  scale_x_continuous(trans='log10') +
  stat_summary(fun.data = "median_hilow", geom="crossbar", width=0.1, color=cbPalette[3])
#ggsave('R-N0-convergence.pdf', width=14, height=14)
g1
g2 <- ggplot(res, aes(x=M, y=auc_pcr/auc0)) + geom_point(aes(color=N/N0)) +
  theme_classic() + xlab('M') + ylab('AUC_p/AUC_data') + 
  geom_hline(yintercept = 1.0, linetype='dashed') +
  theme(legend.position = c(0.75, 0.9), legend.direction = "horizontal") +
  stat_summary(fun.data = "median_hilow", geom="crossbar",width = 20, color=cbPalette[3])
g2
g3 <- ggplot(res, aes(x=rho, y=auc_pcr/auc0)) + geom_point() +
  theme_classic() + xlab('rho') + ylab('AUC_p/AUC_data') + geom_hline(yintercept = 1.0, linetype='dashed') +
  stat_summary(fun.data = "median_hilow", geom="crossbar",width = .04, color=cbPalette[3])
g3
g4 <- res %>% 
  ggplot(aes(x=N, y=auc_pcr/auc0)) + geom_point(aes(color=N/N0)) +
  theme_classic() + xlab('N') + ylab('AUC_p/AUC_data') + 
  geom_hline(yintercept = 1.0, linetype='dashed') +
  scale_x_continuous(trans='log10') + 
  theme(legend.position = c(0.75, 0.9), legend.direction = "horizontal") +
  stat_summary(fun.data = "median_hilow", geom="crossbar", width = 0.1, color=cbPalette[3])
g4
library(ggpubr)

g <- ggarrange(g4, g2, labels=c("B", "C"), ncol=2, nrow=1)
ggsave("converge_relation_auc.pdf", width=9, height=4, dpi=300)
g

Check r* and max(bac(r))

tic()
rholist <- c(0.1, 0.3, 0.5, 0.7, 0.9)
auclist <- create.auclist(0.6, 0.98, 10)

res <- data.table()
N0 <- 50000

for(r in rholist) {
  for (a in auclist) {
    y <- create.labels(N=N0, rho=r)
    gs <- create.scores.gaussian(y, auc=a)

    ds <- build_curve(gs, y)
    ds_roc <- roc(y, gs)
    ds_ci <- ci(ds_roc)
    info <- attr(ds, 'info')
    #print(info)

    tmp <- data.table(N=length(y), rho=attr(y,'rho'), auc=a, auc_bac=info$auc_bac,
             auprc=info$auprc, th_bac=info$th_bac, rstar=info$rstar, 
             pxxy=info$Pxxy, pxyy=info$Pxyy, sig_auc=sqrt(info$var_auc), 
             sig_auc_delong=(ds_ci[2]-ds_ci[1])/3.92)
    res <- rbind(res, tmp)
    rm(ds)
    rm(ds_roc)
  }
}
toc()

res
tmp <- res
tmp$p <- as.factor(tmp$rho)
g1 <- ggplot(data=tmp) + geom_line(aes(x=rstar, y=th_bac, group=p, color=p)) +
  geom_point(aes(x=rstar, y=th_bac, group=p, color=p)) + 
  geom_abline(slope=1, linetype='dashed') + theme_classic() +
  xlab(TeX('r^*/N')) + ylab(TeX('r_{bac}/N')) + 
  theme(legend.position = c(0.15, 0.7), 
        legend.background = element_rect(color=cbPalette[1], linetype='solid'))
g1
g1 <- ggplot(data=tmp) +
  geom_point(aes(y=rstar, x=th_bac)) + 
  geom_abline(slope=1, linetype='dashed') + theme_classic() +
  ylab(TeX('r_{FD}/N')) + xlab(TeX('r_{bac}/N'))

g1
g2 <- ggplot(data=tmp) + 
  geom_line(aes(x=auc, y=nor, group=rho, color=rho)) +
  geom_point(aes(x=auc, y=nor, group=rho, color=rho)) + 
  geom_hline(yintercept =1, linetype='dashed') + theme_classic() +
  xlab('AUC') + ylab(TeX('r_{bac}/r^*')) + theme(legend.position='none')
g2
g <- ggarrange(g1, g4, g2, g3, labels=c("A", "C", "B", "D"), ncol=2, nrow=2)
#ggsave('threshold-2.pdf', width=7, height=6.5)
print(g)
library(plotly)
res$nor <- res$th_bac/res$rstar

fig <- plot_ly(res, x=~auc, y=~rho, z=~nor, marker = list(color = ~rho, colorscale = c('#FFE1A1', '#683531'), showscale = TRUE), type='mesh3d')
fig <- fig %>% add_markers()

fig
y <- create.labels(N=10000, rho=0.9)
gs <- create.scores.gaussian(y, auc=0.95)

ds <- build_curve(gs, y)
g3 <- plot.curves(ds, type = 3)
g3
y <- create.labels(N=10000, rho=0.1)
gs <- create.scores.gaussian(y, auc=0.95)

ds <- build_curve(gs, y)
g4 <- plot.curves(ds, type = 3)
g4
tmp <- res
tmp$p <- as.factor(tmp$rho)
tmp$sig_auc_delong <- tmp$sig_auc_delong*1.96
g1 <- ggplot(data=tmp) + geom_line(aes(x=sig_auc_delong, y=sig_auc, group=p, color=p)) +
  geom_point(aes(x=sig_auc_delong, y=sig_auc, group=p, color=p)) + 
  geom_abline(slope=1, linetype='dashed') + theme_classic() + 
  xlab(TeX('$\\sigma_{AUC}$ (Delong)')) + ylab(TeX('$\\sigma_{AUC}$ (FD)')) + 
  theme(legend.position='none') +
  xlim(c(0,0.0035)) + ylim(c(0,0.0035))
g1
tmp <- res
tmp$p <- as.factor(tmp$rho)
tmp$sig_auc_delong <- tmp$sig_auc_delong*1.96
gb2 <- ggplot(data=tmp) + 
  geom_point(aes(x=sig_auc_delong, y=sig_auc)) + 
  geom_abline(slope=1, linetype='dashed') + theme_classic() + 
  xlab(TeX('$\\sigma_{AUC}$ (Delong)')) + ylab(TeX('$\\sigma_{AUC}$ (FD)')) + 
  theme(legend.position='none')
#xlim(c(0,0.0035)) + ylim(c(0,0.0035))
gb2
tmp$sig_nor <- tmp$sig_auc/tmp$sig_auc_delong
g2 <- ggplot(data=tmp) + 
  geom_line(aes(x=auc, y=sig_nor, group=p, color=p)) +
  geom_point(aes(x=auc, y=sig_nor, group=p, color=p)) + 
  geom_hline(yintercept = 1, linetype='dashed') + theme_classic() +
  xlab('AUC') + ylab(TeX('$\\sigma_{FD} / \\sigma_{Delong}$')) +
  theme(legend.position = c(0.15, 0.35), 
        legend.background = element_rect(color=cbPalette[1], linetype='solid'))

g2
min_pxxy <- min(tmp$pxxy)
g3 <- ggplot(data=tmp) + theme_classic() +
  geom_line(aes(x=auc, y=pxxy, group=p, color=p)) +
  geom_point(aes(x=auc, y=pxxy, group=p, color=p)) + 
  xlab(TeX('P_{xy}')) + ylab(TeX('P_{xxy}')) +
  geom_abline(slope = 1, linetype='dashed') +
  xlim(c(min_pxxy,1)) + ylim(c(min_pxxy,1)) +
  theme(legend.position='none')
g3
min_pxyy <- min(tmp$pxyy)
g4 <- ggplot(data=tmp) + theme_classic() +
  geom_line(aes(x=auc, y=pxyy, group=p, color=p)) +
  geom_point(aes(x=auc, y=pxyy, group=p, color=p)) + 
  xlab(TeX('P_{xy}')) + ylab(TeX('P_{xyy}')) +
  geom_abline(slope = 1, linetype='dashed') +
  xlim(c(min_pxyy,1)) + ylim(c(min_pxyy,1)) +
  theme(legend.position='none')
g4
g <- ggarrange(g1, g3, g2, g4, labels = c('A', 'C', 'B', 'D'), ncol = 2, nrow = 2)
#ggsave("ci_total.pdf", width=7, height=6.5)
print(g)
library(ggpubr)
g <- ggarrange(g1, gb2, labels=c('A', 'B'), ncol=2, nrow=1)
#ggsave("Figure3v4.pdf", width=7, height=3.5)
print(g)


sungcheolkim78/FiDEL documentation built on Nov. 13, 2024, 7:58 a.m.