require(lolR) require(ggplot2) require(latex2exp) require(MASS) require(gridExtra) require(data.table) require(reshape2) require(R.matlab) require(grid) require(plyr) classifier.alg <- "rf" # compute the cutoff for the particular trial to get an approximate elbow # by computing the smallest r with an associated lhat within 5% # of the global minimum lhat compute_cutoff <- function(rs, lhats, t=0.05) { sr.ix <- sort(rs, decreasing=FALSE, index.return=TRUE)$ix # compute minimum value min.lhat <- min(lhats) # compute minimum value + 5% lhat.thresh <- (1 + t)*min.lhat # find which indices are all below this lhat.below <- which(lhats <= lhat.thresh) rs.below <- rs[lhat.below]; lhats.below <- lhats[lhat.below] tmin.ix <- min(rs.below, index.return=TRUE) return(list(r=rs.below[tmin.ix], lhat=lhats.below[tmin.ix])) } plot_sim_lhats <- function(data, cols, pt.dat, linetype, title="", by=10, from=10, ylab=TeX("$\\hat{L}$"), xlab="Embedded Dimensions", fsize=12) { lims <- c(floor(10*min(data$lhat))/10, ceiling(10*max(data$lhat))/10) if (unique(data$sim)[1] == "Toeplitz") { length.out=4 } else { length.out=3 } breaks = unique(round(seq(from=lims[1], to=lims[2], length.out = length.out), digits=1)) xlims <- c(min(data$r), max(data$r)) xbreaks <- seq(from=from, to=xlims[2], by=by) plot_sims <- ggplot(data, aes(x=r, y=lhat, linetype=alg, color=alg)) + geom_line(size=.95) + scale_color_manual(values=cols, limits=names(cols), guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") + scale_linetype_manual(values=linetype, limits=names(cols), guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") + geom_point(data=pt.dat, aes(x=r, y=lhat, linetype=alg, color=alg), size=2) + #geom_line(data=base::subset(data, alg == "CCA"), aes(x=r, y=lhat, group=alg, linetype color=alg), size=.75) + #geom_point(data=base::subset(pt.dat, alg == "CCA"), aes(x=r, y=lhat, group=alg, color=alg), size=2) + #geom_line(data=base::subset(data, alg != "CCA" & alg != "QOQ"), aes(x=r, y=lhat, group=alg, color=alg), size=.75) + #geom_point(data=base::subset(pt.dat, alg != "CCA"), aes(x=r, y=lhat, group=alg, color=alg), size=2) + #geom_line(data=base::subset(data, alg == "QOQ"), aes(x=r, y=lhat, group=alg, color=alg), linetype="dashed", size=.75) + xlab(xlab) + ylab(ylab) + ggtitle(title) + theme_bw() + scale_y_continuous(limits=lims, breaks=breaks) + scale_x_continuous(limits=xlims, breaks=xbreaks) + theme(plot.margin = unit(c(h,w,h,h), "cm")) + theme(legend.position="bottom", axis.title.y=element_text(size=fsize)) return(plot_sims) } g_legend<-function(a.gplot){ tmp <- ggplot_gtable(ggplot_build(a.gplot)) leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") legend <- tmp$grobs[[leg]] return(legend) }
toep <- readMat('./data/fig3/toeplitz.mat') tr2 <- readMat('./data/fig3/rtrunk.mat') tr3 <- readMat('./data/fig3/3trunk.mat') ft <- readMat('./data/fig3/fat_tails.mat') qd <- readMat('./data/fig3/r2toeplitz.mat') maxr <- c(90, 30, 30, 30, 30) minr <- 0 mats <- list(toep, tr2, tr3, ft, qd) sim_name <- c("Toeplitz", "Trunk-2", "Trunk-3", "Fat-Tails (D=1000)", "QDA") interest <- list(c("ROAD"), c("ROAD"), c("LASSO"), c("ROAD"), c("ROAD")) key <- c("ROAD", "lasso") names(key) <- c("ROAD", "LASSO") resultsm <- data.frame(sim=c(), iter=c(), alg=c(), r=c(), lhat=c()) for (k in 1:length(mats)) { dat <- mats[[k]] desired_r <- 1:maxr[k] for (i in 1:length(dat$ks)) { # i encodes simulation iteration for (j in length(interest[[k]])) { algname <- key[interest[[k]][j]] algid <- which(dimnames(dat$ks[[i]][[1]])[[1]] == algname) rs <- dat$ks[[i]][[1]][algid,,1][[algname]] algid <- which(dimnames(dat$Lhat)[[1]] == algname) lhats <- dat$Lhat[algid,,][[i]] lhat_adjust <- spline(rs, lhats, xout=desired_r, method='fmm', ties=mean) resultsm <- rbind(resultsm, data.frame(sim=sim_name[k], iter=i, alg=interest[[k]][j], r=lhat_adjust$x, lhat=lhat_adjust$y)) } } }
# run the simulations once to obtain some basic visualizations n=100 # the simulations to call themselves sims <- list(lol.sims.rtrunk, lol.sims.toep, lol.sims.rtrunk, lol.sims.fat_tails, lol.sims.qdtoep) maxr <- c(30, 90, 30, 30, 30) ds <- c(100, 100, 100, 1000, 100) # additional arguments for each simulation scenario opt_args <- list(list(), list(), list(K=3), list(rotate=TRUE), list()) sim_names = c("Trunk-2", "Toeplitz", "Trunk-3", "Fat-Tails (D=1000)", "QDA") sim_titles = c("(A)", "(B)", "(C)", "(D)", "(E)") ndim <- c(10, 10, 10, 10, 10) sim_min <- c(10, 30, 10, 10, 10) by <- c(10, 30, 10, 10, 10) cov_plots <- list() mean_plots <- list() counter <- 1
# read the results in results.means <- readRDS(paste('./data/fig3/lol_fig3_', classifier.alg, '.rds', sep="")) results <- rbind(results$overall[, colnames(results$overall) != 'se'], resultsm) #results <- results$overall nan.mean <- function(x) mean(x, na.rm=TRUE) results$overall <- results[complete.cases(results),] results.means <- aggregate(lhat ~ sim + alg + r + lhat, data = results, FUN = nan.mean) results.means <- results.means[complete.cases(results.means),] algs <- c("LOL", "QOQ", "ROAD", "LASSO", "PLS", "CCA", "PCA", "LDA", "RP") acols <- c("#00FF00", "#00FF00", "#AAAAAA", "#AAAAAA", "#AAAAAA", "#666666", "#666666", "#666666", "#000000") names(algs) <- acols shapes <- c(21, 24, 21, 24, 23, 21, 24, 23, 21) names(shapes) <- algs sim_plots <- list() results.means$alg <- revalue(results.means$alg, c("cPCA"="LDA"))
plot.results <- data.frame(r=c(), lhat=c(), sim=c(), alg=c()) for (i in 1:length(dat.names)) { for (j in 1:length(algs)) { alg <- algs[j] ss <- results.means[results.means$sim == dat.names[i] & results.means$alg == algs[j],] rs <- ss$r; lhats <- ss$lhat min.result <- compute_cutoff(rs, lhats) r.min <- min.result$r; lhat.min <- min.result$lhat if (alg == 'LOL') { norm.r <- r.min norm.lhat <- lhat.min } plot.results <- rbind(plot.results, data.frame(r=r.min/norm.r, lhat=lhat.min/norm.lhat, sim=titles[i], alg=alg)) } }
box <- data.frame(x=c(.1, 1, 1, .1), y=c(.1, .1, 1, 1)) panela <- ggplot(plot.results, aes(x=r, y=lhat)) + geom_polygon(data=box, aes(x=x, y=y), fill='red', alpha=0.15) + geom_point(aes(x=r, y=lhat, shape=alg, fill=alg), alpha=0.8, color='black', size=3) + scale_fill_manual(values=acols, guide=guide_legend(ncol=2, byrow=TRUE), name="Algorithm") + scale_shape_manual(values=shapes, guide=guide_legend(ncol=2, byrow=TRUE), name="Algorithm") + ylab("Normalized Misclassification Rate") + xlab("Normalized Embedding Dimension") + labs(shape="Simulation", color="Algorithm") + ggtitle("Simulated Data Performance") + scale_y_continuous(trans=log10_trans(), limits=c(.1, 10)) + scale_x_continuous(trans=log10_trans(), limits=c(.1, 10)) + theme_bw() leg_panela <- g_legend(panela) panela <- panela + theme(legend.position=NaN) overall_leg <- ggplot(plot.results, aes(x=r, y=lhat, fill=alg)) + geom_polygon(data=box, aes(x=x, y=y), fill='red', alpha=0.2) + geom_point(aes(x=r, y=lhat, shape=alg, fill=alg), alpha=0.8, color='black', size=3) + scale_fill_manual(values=acols, guide=guide_legend(ncol=2, byrow=TRUE), name="Algorithm") + scale_shape_manual(values=shapes, guide=guide_legend(ncol=2, byrow=TRUE), name="Algorithm") + ylab("Normalized Misclassification Rate") + xlab("Normalized Embedding Dimension") + labs(color="Algorithm", shape="Experiment") + ggtitle("Comparison of Embedding Techniques to LOL") + scale_y_continuous(trans=log10_trans(), limits=c(.1, 10)) + scale_x_continuous(trans=log10_trans(), limits=c(.1, 10)) + theme_bw()
require(lolR) require(ggplot2) require(latex2exp) require(MASS) require(gridExtra) require(data.table) require(reshape2) require(grid) require(plyr) require(slbR) # compute the cutoff for the particular trial to get an approximate elbow # by computing the smallest r with an associated lhat within 5% # of the global minimum lhat compute_cutoff <- function(rs, lhats, t=0.05) { sr.ix <- sort(rs, decreasing=FALSE, index.return=TRUE)$ix # compute minimum value min.lhat <- min(lhats) # compute minimum value + 5% lhat.thresh <- (1 + t)*min.lhat # find which indices are all below this lhat.below <- which(lhats <= lhat.thresh) rs.below <- rs[lhat.below]; lhats.below <- lhats[lhat.below] tmin.ix <- min(rs.below, index.return=TRUE) return(list(r=rs.below[tmin.ix], lhat=lhats.below[tmin.ix])) } plot_sim_lhats <- function(data, cols, pt.dat, title="", by=10, from=10, ylab=TeX("$\\hat{L}$"), xlab="Embedded Dimensions", fsize=12) { lims <- c(floor(10*min(data$lhat))/10, ceiling(10*max(data$lhat))/10) breaks= seq(from=lims[1], to=lims[2], by=0.1) xlims <- c(min(data$r), max(data$r)) xbreaks <- seq(from=from, to=xlims[2], by=by) plot_sims <- ggplot(data, aes(x=r, y=lhat, group=alg, color=alg)) + geom_line(data=base::subset(data, alg == "CCA"), aes(x=r, y=lhat, group=alg, color=alg), size=.75) + geom_point(data=base::subset(pt.dat, alg == "CCA"), aes(x=r, y=lhat, group=alg, color=alg), size=2) + geom_line(data=base::subset(data, alg != "CCA" & alg != "QOQ"), aes(x=r, y=lhat, group=alg, color=alg), size=.75) + geom_point(data=base::subset(pt.dat, alg != "CCA"), aes(x=r, y=lhat, group=alg, color=alg), size=2) + geom_line(data=base::subset(data, alg == "QOQ"), aes(x=r, y=lhat, group=alg, color=alg), linetype="dashed", size=.75) + xlab(xlab) + ylab(ylab) + ggtitle(title) + theme_bw() + scale_y_continuous(limits=lims, breaks=breaks) + scale_x_continuous(limits=xlims, breaks=xbreaks) + scale_color_manual(values=cols, limits=names(cols), guide=guide_legend(ncol=1)) +#, title.position="top", title.hjust = .5)) + labs(color="Algorithm", linetype="Test") return(plot_sims) } g_legend<-function(a.gplot){ tmp <- ggplot_gtable(ggplot_build(a.gplot)) leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") legend <- tmp$grobs[[leg]] return(legend) }
The below code will produce the required data, which runs LOL, cPCA, PCA, and LR-CCA at the desired simulation settings. Note that this function will multi-thread natively, and took approximately 7 hours to run on a 96 core machine with $\frac{7}{8}$ of the cores active.
source('./pmlb_driver.R')
# read the results in results.means <- readRDS(paste('./data/fig5/lol_fig5_', classifier.alg, '.rds', sep="")) results.means <- results.means[complete.cases(results.means),] results.means <- results.means[results.means$alg %in% c("LOL", "PLS"), ] algs <- c("LOL", "QOQ", "ROAD", "LASSO", "PLS", "CCA", "PCA", "LDA", "RP") acols <- c("#00FF00", "#00FF00", "#AAAAAA", "#AAAAAA", "#AAAAAA", "#666666", "#666666", "#666666", "#000000") names(algs) <- acols shapes <- c(21, 24, 21, 24, 23, 21, 24, 23, 21) names(shapes) <- algs exp_names <- names(pmlb.list(task="classification")$dsets.info)
plot.results <- data.frame(r=c(), lhat=c(), exp=c(), alg=c()) for (i in 1:length(exp_names)) { for (j in 1:length(algs)) { tryCatch({ alg <- algs[j] ss <- results.means[results.means$sim == exp_names[i] & results.means$alg == algs[j],] rs <- ss$r; lhats <- ss$lhat min.result <- compute_cutoff(rs, lhats) r.min <- min.result$r; lhat.min <- min.result$lhat if (alg == 'LOL') { norm.r <- r.min norm.lhat <- lhat.min } plot.results <- rbind(plot.results, data.frame(r=r.min/norm.r, lhat=lhat.min/norm.lhat, exp=exp_names[i], alg=alg)) }, error=function(e) {NaN}, warning=function(w) {NaN}) } }
plot.results$exp <- factor(plot.results$exp) box <- data.frame(x=c(.1, 1, 1, .1), y=c(.1, .1, 1, 1)) panelb <- ggplot(plot.results, aes(x=r, y=lhat)) + geom_polygon(data=box, aes(x=x, y=y), fill='red', alpha=0.15) + geom_point(aes(x=r, y=lhat, shape=alg, fill=alg), alpha=0.5, color='black', size=2) + scale_fill_manual(values=acols, guide=guide_legend(ncol=2, byrow=TRUE), name="Algorithm") + scale_shape_manual(values=shapes, guide=guide_legend(ncol=2, byrow=TRUE), name="Algorithm") + ylab("Normalized Misclassification Rate") + xlab("Normalized Embedding Dimension") + labs(shape="Simulation", color="Algorithm") + ggtitle("Real Data Performance") + scale_y_continuous(trans=log10_trans(), limits=c(.1, 10)) + scale_x_continuous(trans=log10_trans(), limits=c(.1, 10)) + theme_bw() leg_panelb <- g_legend(panelb) panelb <- panelb + theme(legend.position=NaN)
leg_overall <- g_legend(overall_leg) grid.newpage() grid.arrange(arrangeGrob(panela), arrangeGrob(panelb + xlab("") + ylab("")), leg_overall, widths=c(.8, .8, .5), ncol=3)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.