require(lolR) require(ggplot2) require(MASS) require(abind) require(robust) require(e1071) require(reshape2) n=100 d=30 r=5 #rot=-pi/4 # rotation in radians #R = rbind(c(cos(rot), -sin(rot), 0, 0, 0), c(sin(rot), cos(rot), 0, 0, 0), # c(0,0,1,0,0), c(0,0,0,1,0), c(0,0,0,0,1)) # rotation matrix for rotation by 45 degrees in first 2 dimensions
data <- lol.sims.rev_rtrunk(n, d, b=1, maxvar=6) data.regular_trunk <- lol.sims.rtrunk(n, d) X = data$X; Y = data$Y # generate outliers Sigmas.outliers <- data.regular_trunk$Sigmas # for (i in 1:dim(data.regular_trunk$Sigmas)[3]) { # for (j in 1:dim(data.regular_trunk$Sigmas)[1]) { # for (k in 1:dim(data.regular_trunk$Sigmas)[2]) { # Sigmas.outliers[j, k, i] <- data.regular_trunk$Sigmas[(d+1)-j, (d+1)-k, i] # } # } # }
result.robcov.performance <- data.frame(i=c(), class=c(), error=c(), method=c(), iter=c()) niter <- 10 mus.outliers <- array(0, dim=c(d, 2)) for (i in 1:10) { for (j in 1:niter) { data.outlier <- lolR:::lol.sims.sim_gmm(mus=mus.outliers, Sigmas=i*Sigmas.outliers, n=.2*n, priors=data$priors) # randomly reorder Y for "noise" X.o <- data.outlier$X; Y.o <- sample(data.outlier$Y) X <- rbind(X, X.o) Y <- c(Y, Y.o) # randomly reorder X and Y reord <- sample(1:length(Y)) X <- X[reord,]; Y <- Y[reord] for (c in 1:2) { result.robcov.performance <- rbind(result.robcov.performance, data.frame(i=i, class=c, error=norm(cov(X[Y == c,,drop=FALSE]) - data$Sigmas[,,c], "F"), method="cov", iter=j)) result.robcov.performance <- rbind(result.robcov.performance, data.frame(i=i, class=c, error=norm(covRob(X[Y == c,,drop=FALSE], estim="weighted")$cov - data$Sigmas[,,c], "F"), method="robCov", iter=j)) } } } robcov.perf <- aggregate(error ~ i + method, data=result.robcov.performance, FUN=mean) ggplot(data=robcov.perf, aes(x=i, y=error, group=method, color=method)) + geom_line() + xlab("Variance Multiplier of Noise Points") + ylab("Error of True Covariance Estimation") + ggtitle("Comparison of Robust and non-robust Covariance Estimation")
data <- lol.sims.rtrunk(n, d, b=1, maxvar=6) X = data$X; Y = data$Y mus.plotdat <- melt(data$mus); colnames(mus.plotdat) <- c("Dimension", "Class", "Value") mus.plotdat$Class <- factor(mus.plotdat$Class) ggplot(mus.plotdat, aes(x=Dimension, y=Value, group=Class, color=Class)) + geom_line() + xlab("Dimension") + ylab("Mean Value") + theme_bw() + ggtitle("Good Samples Means") sigmas.plotdat <- melt(data$Sigmas); colnames(sigmas.plotdat) <- c("x", "y", "Class", "Value") sigmas.plotdat <- aggregate(Value ~ x + y, data=sigmas.plotdat, FUN=mean) ggplot(sigmas.plotdat, aes(x=x, y=y, fill='purple', alpha=Value)) + geom_tile() + scale_alpha_continuous() + xlab("Dimension 1") + ylab("Dimension 2") + theme_bw() + ggtitle("Good Samples Variance") # generate outliers data.regular_trunk <- lol.sims.rev_rtrunk(n, d) Sigmas.outliers <- data.regular_trunk$Sigmas # for (i in 1:dim(data.regular_trunk$Sigmas)[3]) { # for (j in 1:dim(data.regular_trunk$Sigmas)[1]) { # for (k in 1:dim(data.regular_trunk$Sigmas)[2]) { # Sigmas.outliers[j, k, i] <- data.regular_trunk$Sigmas[(d+1)-j, (d+1)-k, i] # } # } # } mus.outliers <- array(0, dim=c(d, 2)) data.outlier <- lolR:::lol.sims.sim_gmm(mus=mus.outliers, Sigmas=10*Sigmas.outliers, n=.4*n, priors=data$priors) sigmas.plotdat <- melt(10*Sigmas.outliers); colnames(sigmas.plotdat) <- c("x", "y", "Class", "Value") sigmas.plotdat <- aggregate(Value ~ x + y, data=sigmas.plotdat, FUN=mean) ggplot(sigmas.plotdat, aes(x=x, y=y, fill='purple', alpha=Value)) + geom_tile() + scale_alpha_continuous() + xlab("Dimension 1") + ylab("Dimension 2") + theme_bw() + ggtitle("Noise Samples Variance") # randomly reorder Y for "noise" X.o <- data.outlier$X; Y.o <- sample(data.outlier$Y) X <- rbind(X, X.o) Y <- c(Y, Y.o) # randomly reorder X and Y reord <- sample(1:length(Y)) X.train <- X[reord,]; Y.train <- Y[reord] data.test <- lol.sims.rev_rtrunk(n, d, b=1, maxvar=6) X.test <- data.test$X; Y.test <- data.test$Y
data <- data.frame(x1=X.train[,1], x2=X.train[,2], y=Y.train) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point() + xlab("x1") + ylab("x2") + ggtitle("Simulated Data")
result <- lol.project.lol(X.train, Y.train, r) data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y.train) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point(alpha=0.6) + xlab("x1") + ylab("x2") + ggtitle("Projected Training Data using LOL")
newXr <- lol.embed(X.test, result$A) data <- data.frame(x1=newXr[,1], x2=newXr[,2], y=Y.test) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point(alpha=0.6) + xlab("x1") + ylab("x2") + ggtitle("Projected Testing Data using LOL")
liney <- lda(result$Xr, as.factor(Y.train), kernel="linear") resultlda <- predict(liney, newXr)$class lhat <- 1 - sum(resultlda == Y.test)/length(Y.test) print(lhat)
result <- lol.project.lol(X.train, Y.train, r, robust=TRUE) data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y.train) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point(alpha=0.6) + xlab("x1") + ylab("x2") + ggtitle("Projected Data using Robust LOL")
newXr <- lol.embed(X.test, result$A) data <- data.frame(x1=newXr[,1], x2=newXr[,2], y=Y.test) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point(alpha=0.6) + xlab("x1") + ylab("x2") + ggtitle("Projected Testing Data using Robust LOL")
liney <- lda(result$Xr, as.factor(Y.train)) resultlda <- predict(liney, newXr)$class lhat <- 1 - sum(resultlda == Y.test)/length(Y.test) print(lhat)
result <- lol.project.pls(X.train, Y.train, r) data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y.train) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point(alpha=0.6) + xlab("x1") + ylab("x2") + ggtitle("Projected Data using PLS")
newXr <- lol.embed(X.test, result$A) data <- data.frame(x1=newXr[,1], x2=newXr[,2], y=Y.test) data$y <- factor(data$y) ggplot(data, aes(x=x1, y=x2, color=y)) + geom_point(alpha=0.6) + xlab("x1") + ylab("x2") + ggtitle("Projected Testing Data using PLS")
liney <- lda(result$Xr, as.factor(Y.train)) resultlda <- predict(liney, newXr)$class lhat <- 1 - sum(resultlda == Y.test)/length(Y.test) print(lhat)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.