require(lolR)
require(ggplot2)
require(MASS)
require(abind)
require(robust)
require(e1071)
require(reshape2)
n=100
d=30
r=5
#rot=-pi/4  # rotation in radians
#R = rbind(c(cos(rot), -sin(rot), 0, 0, 0), c(sin(rot), cos(rot), 0, 0, 0),
#          c(0,0,1,0,0), c(0,0,0,1,0), c(0,0,0,0,1))  # rotation matrix for rotation by 45 degrees in first 2 dimensions

Generate Data

data <- lol.sims.rev_rtrunk(n, d, b=1, maxvar=6)
data.regular_trunk <- lol.sims.rtrunk(n, d)
X = data$X; Y = data$Y

# generate outliers
Sigmas.outliers <- data.regular_trunk$Sigmas
# for (i in 1:dim(data.regular_trunk$Sigmas)[3]) {
#   for (j in 1:dim(data.regular_trunk$Sigmas)[1]) {
#     for (k in 1:dim(data.regular_trunk$Sigmas)[2]) {
#       Sigmas.outliers[j, k, i] <- data.regular_trunk$Sigmas[(d+1)-j, (d+1)-k, i]
#     }
#   }
# }

Average Per-Class Error as a function of Magnitude of the Noise

result.robcov.performance <- data.frame(i=c(), class=c(), error=c(), method=c(), iter=c())
niter <- 10
mus.outliers <- array(0, dim=c(d, 2))
for (i in 1:10) {
  for (j in 1:niter) {
    data.outlier <- lolR:::lol.sims.sim_gmm(mus=mus.outliers,  Sigmas=i*Sigmas.outliers, n=.2*n, priors=data$priors)
    # randomly reorder Y for "noise"
    X.o <- data.outlier$X; Y.o <- sample(data.outlier$Y)

    X <- rbind(X, X.o)
    Y <- c(Y, Y.o)
    # randomly reorder X and Y
    reord <- sample(1:length(Y))
    X <- X[reord,]; Y <- Y[reord]
    for (c in 1:2) {
      result.robcov.performance <- rbind(result.robcov.performance, data.frame(i=i, class=c,
                                                                               error=norm(cov(X[Y == c,,drop=FALSE]) - data$Sigmas[,,c], "F"),
                                                                               method="cov", iter=j))
      result.robcov.performance <- rbind(result.robcov.performance, data.frame(i=i, class=c,
                                                                               error=norm(covRob(X[Y == c,,drop=FALSE], estim="weighted")$cov - data$Sigmas[,,c], "F"),
                                                                               method="robCov", iter=j))
    }

  }
}

robcov.perf <- aggregate(error ~ i + method, data=result.robcov.performance, FUN=mean)
ggplot(data=robcov.perf, aes(x=i, y=error, group=method, color=method)) +
  geom_line() +
  xlab("Variance Multiplier of Noise Points") +
  ylab("Error of True Covariance Estimation") +
  ggtitle("Comparison of Robust and non-robust Covariance Estimation")
data <- lol.sims.rtrunk(n, d, b=1, maxvar=6)
X = data$X; Y = data$Y

mus.plotdat <- melt(data$mus); colnames(mus.plotdat) <- c("Dimension", "Class", "Value")
mus.plotdat$Class <- factor(mus.plotdat$Class)
ggplot(mus.plotdat, aes(x=Dimension, y=Value, group=Class, color=Class)) +
  geom_line() +
  xlab("Dimension") +
  ylab("Mean Value") +
  theme_bw() +
  ggtitle("Good Samples Means")

sigmas.plotdat <- melt(data$Sigmas); colnames(sigmas.plotdat) <- c("x", "y", "Class", "Value")
sigmas.plotdat <- aggregate(Value ~ x + y, data=sigmas.plotdat, FUN=mean)
ggplot(sigmas.plotdat, aes(x=x, y=y, fill='purple', alpha=Value)) +
  geom_tile() +
  scale_alpha_continuous() +
  xlab("Dimension 1") +
  ylab("Dimension 2") +
  theme_bw() +
  ggtitle("Good Samples Variance")


# generate outliers
data.regular_trunk <- lol.sims.rev_rtrunk(n, d)
Sigmas.outliers <- data.regular_trunk$Sigmas
# for (i in 1:dim(data.regular_trunk$Sigmas)[3]) {
#   for (j in 1:dim(data.regular_trunk$Sigmas)[1]) {
#     for (k in 1:dim(data.regular_trunk$Sigmas)[2]) {
#       Sigmas.outliers[j, k, i] <- data.regular_trunk$Sigmas[(d+1)-j, (d+1)-k, i]
#     }
#   }
# }
mus.outliers <- array(0, dim=c(d, 2))
data.outlier <- lolR:::lol.sims.sim_gmm(mus=mus.outliers,  Sigmas=10*Sigmas.outliers, n=.4*n, priors=data$priors)


sigmas.plotdat <- melt(10*Sigmas.outliers); colnames(sigmas.plotdat) <- c("x", "y", "Class", "Value")
sigmas.plotdat <- aggregate(Value ~ x + y, data=sigmas.plotdat, FUN=mean)
ggplot(sigmas.plotdat, aes(x=x, y=y, fill='purple', alpha=Value)) +
  geom_tile() +
  scale_alpha_continuous() +
  xlab("Dimension 1") +
  ylab("Dimension 2") +
  theme_bw() +
  ggtitle("Noise Samples Variance")

# randomly reorder Y for "noise"
X.o <- data.outlier$X; Y.o <- sample(data.outlier$Y)

X <- rbind(X, X.o)
Y <- c(Y, Y.o)
# randomly reorder X and Y
reord <- sample(1:length(Y))
X.train <- X[reord,]; Y.train <- Y[reord]


data.test <- lol.sims.rev_rtrunk(n, d, b=1, maxvar=6)
X.test <- data.test$X; Y.test <- data.test$Y
data <- data.frame(x1=X.train[,1], x2=X.train[,2], y=Y.train)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point() +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Simulated Data")

Regular LOL

result <- lol.project.lol(X.train, Y.train, r)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y.train)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point(alpha=0.6) +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Training Data using LOL")
newXr <- lol.embed(X.test, result$A)

data <- data.frame(x1=newXr[,1], x2=newXr[,2], y=Y.test)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point(alpha=0.6) +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Testing Data using LOL")
liney <- lda(result$Xr, as.factor(Y.train), kernel="linear")
resultlda <- predict(liney, newXr)$class
lhat <- 1 - sum(resultlda == Y.test)/length(Y.test)
print(lhat)

Robust LOL

result <- lol.project.lol(X.train, Y.train, r, robust=TRUE)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y.train)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point(alpha=0.6) +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Data using Robust LOL")
newXr <- lol.embed(X.test, result$A)

data <- data.frame(x1=newXr[,1], x2=newXr[,2], y=Y.test)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point(alpha=0.6) +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Testing Data using Robust LOL")
liney <- lda(result$Xr, as.factor(Y.train))
resultlda <- predict(liney, newXr)$class
lhat <- 1 - sum(resultlda == Y.test)/length(Y.test)
print(lhat)

PLS

result <- lol.project.pls(X.train, Y.train, r)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y.train)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point(alpha=0.6) +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Data using PLS")
newXr <- lol.embed(X.test, result$A)

data <- data.frame(x1=newXr[,1], x2=newXr[,2], y=Y.test)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point(alpha=0.6) +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Testing Data using PLS")
liney <- lda(result$Xr, as.factor(Y.train))
resultlda <- predict(liney, newXr)$class
lhat <- 1 - sum(resultlda == Y.test)/length(Y.test)
print(lhat)


neurodata/lol documentation built on March 3, 2021, 1:46 a.m.