Figure 2 - LOL Paper

require(lol)
require(ggplot2)
require(latex2exp)
require(MASS)
require(gridExtra)

n <- 200
d <- 1000
plot_sim <- function(X, Y, name) {
  data <- data.frame(x1=X[,1], x2=X[,2], y=Y)
  data$y <- factor(data$y)
  ggplot(data, aes(x=x1, y=x2, color=y)) +
    geom_point() +
    xlab(TeX("$x_1$")) +
    ylab(TeX("$x_2$")) +
    ggtitle(name) +
    labs(color="Class Label") +
    theme_bw()
}

# project into top 20 dimensions for X and Y and returns
# LDA projection plot
run_sim <- function(inps, func, ylab="", title="", r=10) {
  result <- do.call(func, c(inps, list(r=4)))
  # since CCA will fail with CCA due to colinearity
  xproj <- tryCatch({
    liney <- lda(result$Xr, inps$Y)
    resultl <- predict(liney, result$Xr)
    xproj <- resultl$x[,1]
  }, error=function(e) {
    xproj <- result$Xr[,1]
    return(xproj)
  }, finally = function(f) {
    return(xproj)
  })
  data <- data.frame(x1=xproj, y=inps$Y)
  data$y <- factor(data$y)
  proj_plot <- ggplot(data, aes(x=x1, y=..scaled.., fill=y, linetype=y)) +
    geom_density(adjust=2, alpha=0.6) +
    xlab(TeX("")) +
    ylab(TeX(ylab)) +
    theme_bw() +
    ggtitle(title) +
    scale_fill_discrete(name="Class Posterior") +
    scale_y_continuous(breaks=c(0, 0.5, 1)) +
    guides(linetype=FALSE) #+
    #geom_jitter(data=data, aes(x=x1, y=0, group=y, color=y, shape=y), position = position_jitter(width=0, height=0.2))
  return(proj_plot)
}

g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}

In this notebook, we will consider 3 simulations with $K=2$ classes: Stacked Cigars, trunk, and rotated trunk. We will simulate $n=100$ examples in $d=1000$ dimensions, reduce dimensionality to $r=20$ using the indicated technique, and will use LDA to estimate the $K-1=1$ dimensional projection optimizing the class discriminant boundary using MASS::lda.

We begin by generating the simulations:

sims <- list(lol.sims.cigar, lol.sims.rtrunk, lol.sims.rtrunk)
rotate <- c(FALSE, FALSE, TRUE, FALSE)
sim_names <-  c("Stacked Cigars", "Trunk", "Rotated Trunk")
funcs <- list(lol.project.pca, lol.project.cpca, lol.project.lrcca,
              lol.project.lol, lol.project.bayes_optimal)
alg_names <- c("PCA", "PCA'", "CCA", "LOL", "Bayes Optimal")

settings <- list(list(), list(), list(rotate=TRUE))

sim_plots <- list()
res_plots <- list()
counter1 <- 1; counter2 <- 1
for (i in 1:length(settings)) {
  data <- do.call(sims[[i]], c(list(n, d), settings[[i]]))
  sim_plots[[counter1]] <- plot_sim(data$X, data$Y, sim_names[i])
  for (j in 1:length(funcs)) {
    inps <- list(X=data$X, Y=data$Y, mus=data$mus, Sigmas=data$Sigmas, priors=data$priors)
    res_plots[[counter2]] <- run_sim(inps, funcs[[j]], ylab=alg_names[j],
                                     title=sim_names[i])
    counter2 <- counter2 + 1
  }

  counter1 <- counter1 + 1
}

Below, we plot the top column showing the simulation setup colored by class, showing the first 2 dimensions. The next 4 rows show the 2-class conditional posteriors resulting from projecting the data onto the LDA-estimated discriminants after using the manifold technique indicated in the left-most column for reducing dimensionality:

top_legend <- g_legend(sim_plots[[1]])  # legend for the top most row
bottom_legend <- g_legend(res_plots[[length(sim_names) + 1]])  # legend for the rest of the examples
sim_plots <- sapply(sim_plots, function(simp) {
  simp + xlab("") + ylab("") + theme(legend.position=NaN)
  }, simplify=FALSE)

res_plots <- sapply(1:length(res_plots), function(j) {
    resp <- res_plots[[j]] + xlab("") + ggtitle("") +theme(legend.position=NaN)
    # remove the ylabel of only the non-left most columns
    if (j > length(alg_names)) {
      resp <- resp + ylab("")
    }
    return(resp)
  }, simplify=FALSE)
sim_grid <- grid.arrange(grobs=sim_plots, ncol=length(sim_names))
res_grid <- grid.arrange(grobs=res_plots, ncol=length(sim_names), as.table=FALSE)

res_bottom <- grid.arrange(res_grid, bottom_legend, nrow=1, widths=c(.88, .12))
res_top <- grid.arrange(sim_grid, top_legend, nrow=1, widths=c(.88, .12))

Plotting:

ncol <- length(alg_names) + 1
fig2 <- grid.arrange(res_top, res_bottom, nrow=2,
                     heights=c(.25, .75))


neurodata/lol documentation built on March 3, 2021, 1:46 a.m.