require(lolR) require(ggplot2) require(latex2exp) require(MASS) require(gridExtra) require(data.table) require(reshape2) require(R.matlab) require(grid) require(plyr) require(gtable) require(tidyverse) require(ggplotify) classifier.alg = "lda" # compute the cutoff for the particular trial to get an approximate elbow # by computing the smallest r with an associated lhat within 5% # of the global minimum lhat w=.8 h=.2 mcols <- c("#808080", "#EE7600", "#0e3ec1","#469990") #"#8B4513") names(mcols) <- c("outlier", "1", "2", "3") g_legend<-function(a.gplot){ tmp <- ggplot_gtable(ggplot_build(a.gplot)) leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") legend <- tmp$grobs[[leg]] return(legend) }
The below code will produce the required data, which runs LOL, cPCA, PCA, and LR-CCA at the desired simulation settings. Note that this function will multi-thread natively, and took approximately 7 hours to run on a 96 core machine with $\frac{7}{8}$ of the cores active.
source('./sims_driver.R')
First, we prepare the plots of subsets of the mean and covariance matrices:
# run the simulations once to obtain some basic visualizations n=100 # the simulations to call themselves sims <- list(lol.sims.rtrunk, lol.sims.rev_rtrunk, lol.sims.cross) ds <- c(100, 100, 100) # additional arguments for each simulation scenario opt_args <- list(list(K=3), list(robust=n*.4, maxvar.outlier=1000), list()) sim_names = c("Trunk-3", "Robust", "Cross") sim_titles = c("(A)", "(B)", "(C)") counter <- 1 dims <- list(c(1, 2), c(2, 50), c(50, 51)) sim.dims <- list() sim.dat <- do.call(rbind, lapply(1:length(sims), function(i) { simn <- do.call(sims[[i]], c(list(n, ds[i]), opt_args[[i]])) if(is.null(simn$robust)) { outlier <- FALSE } else { outlier <- !simn$robust$inlier } if (sim_names[i] == "Cross") { X1 <- simn$X[,ds[i]/2]; X2 <- simn$X[,ds[i]/2 + 1] } else { X1 <- simn$X[,1]; X2 <- simn$X[,2] } return(data.frame(X1=X1, X2=X2, Y=simn$Y, sim=sim_names[i], outlier=outlier)) })) %>% mutate(sim=recode_factor(sim, "Trunk-3"="(A) Trunk-3", "Robust"="(B) Robust", "Cross"="(C) Cross"), sim=factor(sim, levels=c("(A) Trunk-3", "(B) Robust", "(C) Cross"), ordered=TRUE)) sim_plt <- sim.dat %>% mutate(Class=ifelse(outlier, "outlier", Y)) %>% group_by(sim) %>% mutate(X1=(X1-min(X1))/(max(X1) - min(X1)), X2=(X2-min(X2))/(max(X2)-min(X2))) %>% ungroup() %>% ggplot(aes(x=X1, y=X2, color=Class)) + geom_point(size=2) + facet_grid(.~sim) + scale_color_manual(values=c("1"="blue", "2"="darkorange", "3"="cyan", "outlier"="darkgray")) + ylab("Simulated Data") + xlab("") + theme_bw() + theme(text=element_text(size=20), axis.ticks.x=element_blank(), axis.ticks.y=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank())
Next, we aggregate over the respective iterations, and subset plots for each function:
# read the results in results <- readRDS(paste('../data/sims/lol_sims_', classifier.alg, '.rds', sep="")) %>% group_by(sim, alg, r) %>% summarize(lhat=mean(lhat)) %>% mutate(best.lhat=min(lhat), best.r=r[lhat == best.lhat], lhat.thresh=best.lhat*1.05, lhat.beats=lhat <= best.lhat, r.star=min(r[lhat.beats])) %>% mutate(alg=factor(recode_factor(alg, "QOL"="QOQ", "LRLDA"="rrLDA"), levels=c("PCA", "rrLDA", "CCA", "RP", "PLS", "LOL", "QOQ", "RLOL"), ordered=TRUE)) %>% filter(!((alg %in% c("RLOL")) & (sim == "Cross"))) %>% filter(!((alg %in% c("QOQ")) & (sim %in% c("Trunk-3", "Robust")))) %>% mutate(sim=recode_factor(sim, "Trunk-3"="(A) Trunk-3", "Robust"="(B) Robust", "Cross"="(C) Cross"), sim=factor(sim, levels=c("(A) Trunk-3", "(B) Robust", "(C) Cross"), ordered=TRUE)) results.star <- results %>% filter(r == r.star) algs <- c("LOL", "RLOL", "QOQ", "PLS", "CCA", "rrLDA", "PCA", "RP", "ROAD", "LASSO") acols <- c("#f41711", "#f41711", "#f41711", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#a65628", "#99b4c6", "#666666") linestyle <- c(1,2,3,1,1,1,1,1,1, 1) names(acols) <- algs names(linestyle) <- algs grid_sim <- results %>% ggplot(aes(x=r, y=lhat, color=alg)) + geom_line(aes(linetype=alg), size=1.2) + geom_point(data=results.star, size=4, shape=18) + scale_linetype_manual(values=linestyle, name="Algorithm") + scale_color_manual(values=acols, name="Algorithm") + facet_grid(.~sim) + xlab("Number of Embedding Dimensions") + ylab("Error") + guides(color=guide_legend(ncol=2), linetype=guide_legend(ncol=2)) + theme_bw() + theme(text=element_text(size=20), strip.text.x=element_blank(), strip.background=element_blank()) lhat_plt <- as.ggplot(gtable_filter(ggplotGrob(grid_sim), "axis-b-[2,3]", trim=FALSE, invert=TRUE))
We combine and plot:
grid.arrange(sim_plt, lhat_plt, nrow=2, heights=c(0.35, 0.35))
plt.multiclass <- readRDS('../data/sims/lol_results_khump.rds') %>% group_by(K, r, alg) %>% summarize(lhat=mean(lhat)) %>% mutate(lhat.chance=1 - 1/K, K=paste0("K=", K), kappa=(lhat.chance - lhat)/(lhat.chance), K=factor(K, levels=c("K=2", "K=4", "K=6", "K=8", "K=10"), ordered=TRUE)) %>% mutate(alg=recode_factor(alg, "LRLDA"="rrLDA")) %>% ggplot(aes(x=r, y=kappa, color=alg)) + geom_line(size=1.2) + facet_grid(.~K) + xlab("Number of Embedding Dimensions") + ylab("Cohen's Kappa") + ggtitle("Performance of Embedding Strategies on Multiclass Problems") + theme(font=element_text(size=20)) + theme_bw() + scale_linetype_manual(values=linestyle, name="Algorithm") + scale_color_manual(values=acols, name="Algorithm") + theme(text=element_text(size=20)) plt.multiclass <- as.ggplot(gtable_filter(ggplotGrob(plt.multiclass), "axis-b-[2,3,4,5]", trim=FALSE, invert=TRUE)) plt.multiclass
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.