require(lol)
require(ggplot2)
require(latex2exp)
require(MASS)
require(gridExtra)
require(data.table)
require(reshape2)
require(R.matlab)
require(scales)

# compute the cutoff for the particular trial to get an approximate elbow
# by computing the smallest r with an associated lhat within 5%
# of the global minimum lhat
compute_cutoff <- function(rs, lhats, t=0.05) {
  sr.ix <- sort(rs, decreasing=FALSE, index.return=TRUE)$ix
  # compute minimum value
  min.lhat <- min(lhats)
  # compute minimum value + 5%
  lhat.thresh <- (1 + t)*min.lhat
  # find which indices are all below this
  lhat.below <- which(lhats <= lhat.thresh)
  rs.below <- rs[lhat.below]; lhats.below <- lhats[lhat.below]
  tmin.ix <- min(rs.below, index.return=TRUE)
  return(list(r=rs.below[tmin.ix], lhat=lhats.below[tmin.ix]))
}
toep <- readMat('./data/fig3/toeplitz.mat')
tr2 <- readMat('./data/fig3/rtrunk.mat')
tr3 <- readMat('./data/fig3/3trunk.mat')
ft <- readMat('./data/fig3/fat_tails.mat')
qd <- readMat('./data/fig3/r2toeplitz.mat')

maxr <- c(90, 30, 30, 30, 30)
minr <- 0
mats <- list(toep, tr2, tr3, ft, qd)
sim.name <- c("Toeplitz", "Trunk-2", "Trunk-3", "Fat-Tails (D=1000)", "QDA")

interest <- list(c("ROAD"), c("ROAD"), c("LASSO"), c("ROAD"), c("ROAD"))
key <- c("ROAD", "lasso")
names(key) <- c("ROAD", "LASSO")


resultsm <- data.frame(sim=c(), iter=c(), alg=c(), r=c(), lhat=c())

for (k in 1:length(mats)) { 
  dat <- mats[[k]]
  desired_r <- 1:maxr[k]
  for (i in 1:length(dat$ks)) {  # i encodes simulation iteration
    for (j in length(interest[[k]])) {
      algname <- key[interest[[k]][j]]
      algid <- which(dimnames(dat$ks[[i]][[1]])[[1]] == algname)
      rs <- dat$ks[[i]][[1]][algid,,1][[algname]]
      algid <- which(dimnames(dat$Lhat)[[1]] == algname)
      lhats <- dat$Lhat[algid,,][[i]]
      lhat_adjust <- spline(rs, lhats, xout=desired_r, method='fmm', ties=mean)
      resultsm <- rbind(resultsm, data.frame(sim=sim.name[k], iter=i, alg=interest[[k]][j],
                                             r=lhat_adjust$x, lhat=lhat_adjust$y))
    }
  }
}
maxr <- c(30, 90, 30, 30, 30)
ds <- c(100, 100, 100, 1000, 100)
# additional arguments for each simulation scenario
opt_args <- list(list(), list(), list(K=3), list(rotate=TRUE), list())
dat.names = c("Trunk-2", "Toeplitz", "Trunk-3", "Fat-Tails (D=1000)", "QDA")
dat.abbrs <- c("T", "Z", "3", "F", "Q")
names(dat.abbrs) <- dat.names

# read the results in
results <- readRDS('./data/fig3/lol_fig3_lda.rds')
results <- rbind(results$overall, resultsm)
#results <- results$overall
nan.mean <- function(x) mean(x, na.rm=TRUE)
results.means <- aggregate(lhat ~ sim + alg + r + lhat, data = results, FUN = nan.mean)

acols <- c("#008000", "#4daf4a", "#e41a1c", "#ff7f00", "#377eb8", "#f781bf", "#00ffff")
algs <- c("LOL", "QOQ", "CCA", "ROAD", "LASSO", "PCA", "cPCA")
names(acols) <- algs
plot.results <- data.frame(r=c(), lhat=c(), symbol=c(), alg=c())
for (i in 1:length(dat.names)) {
  for (j in 1:length(algs)) {
    alg <- algs[j]
    ss <- results.means[results.means$sim == dat.names[i] & results.means$alg == algs[j],]
    rs <- ss$r; lhats <- ss$lhat
    min.result <- compute_cutoff(rs, lhats)
    r.min <- min.result$r; lhat.min <- min.result$lhat
    if (alg == 'LOL') {
      norm.r <- r.min
      norm.lhat <- lhat.min
    }
    plot.results <- rbind(plot.results, data.frame(r=r.min/norm.r, lhat=lhat.min/norm.lhat,
                                                   sim=dat.names[i], alg=alg))
  }
}
box <- data.frame(x=c(.1, 1, 1, .1), y=c(.1, .1, 1, 1))
ggplot(plot.results, aes(x=r, y=lhat, shape=sim, color=alg)) +
  geom_polygon(data=box, aes(x=x, y=y), fill='gray', color='gray') +
  geom_point(size=3) +
  scale_color_manual(values=acols) +
  ylab("Normalized Misclassification Rate") +
  xlab("Normalized Embedding Dimension") +
  labs(shape="Data", color="Algorithm") +
  ggtitle("Comparison of Embedding Techniques to LOL") +
  scale_y_continuous(trans=log10_trans(), limits=c(.1, 10)) +
  scale_x_continuous(trans=log10_trans(), limits=c(.1, 10)) +
  theme_bw()


neurodata/lol documentation built on March 3, 2021, 1:46 a.m.