knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
usethis::use_package("CodingProject3")
usethis::use_package("ggplot2")
data.name.vec <- c(
  "spam",
  "zip.train",
  "prostate",
  "ozone")
data.list <- list()
library(ElemStatLearn)
for(data.name in data.name.vec)
{
  data(list = data.name, package="ElemStatLearn")
  data.list[[data.name]] <- get(data.name)
}
is.binary <- ElemStatLearn::zip.train[,1] %in% c(0,1)
data.list <- list(
  spam=list(
    label.vec=ifelse(ElemStatLearn::spam$spam=="spam", 1, 0),
    feature.mat=as.matrix(ElemStatLearn::spam[, 1:57])),
  SAheart=list(
    label.vec=ifelse(ElemStatLearn::SAheart$SAheart=="chd", 1, 0),
    feature.mat=as.matrix(ElemStatLearn::SAheart[, -ncol(ElemStatLearn::SAheart)])),
  zip.train=list(
    label.vec=ElemStatLearn::zip.train[is.binary,1],
    feature.mat=ElemStatLearn::zip.train[is.binary,-1]),
  prostate=list(
    label.vec=as.numeric(ElemStatLearn::prostate$lpsa),
    feature.mat=as.matrix(ElemStatLearn::prostate[,-c(9, 10)])),
  ozone=list(
    label.vec=ElemStatLearn::ozone$ozone,
    feature.mat=as.matrix(ElemStatLearn::ozone[,-1]))
)
n.folds <- 4
resultsList <- list()
baseline <- list()
mean.loss.list <- list()
#current.set.matrix <-
str(data.list)
for(data.name in names(data.list))
{

  one.data.set <- data.list[[data.name]]
  set.seed(1)




  # print("This is one.data.set")
  # print(one.data.set)
  # print(data.list[[data.name]])
  # print(names(data.list))

  #print(one.data.set$feature.mat[1:nrow(one.data.set$feature.mat)])

  #print(one.data.set[1:nrow(one.data.set$feature.mat), -1])

  # variables for function
  if(data.name == 'ozone'){
  X.mat <- one.data.set$feature.mat
  y.vec <- one.data.set$label.vec
  fold.vec <- sample(rep(1:n.folds, l=nrow(X.mat)))
  max.ite <- 50
  result.mat.list <- list()

  for(test.fold in 1:length(unique(fold.vec)))
  {
    is.test <- fold.vec == test.fold
    is.train <- !is.test

    fold_data <- which(fold.vec == test.fold)

    X.train <- X.mat[-fold_data ,]
    X.valid <- X.mat[fold_data ,]

    y.train <- y.vec[-fold_data]
    y.valid <- y.vec[fold_data]


    fold.vec.train <- sample(rep(1:n.folds, l=nrow(X.train)))

       #For each train/test split, to show that your algorithm is actually learning something                  non-trivial from the inputs/features, compute a baseline predictor that ignores the                     inputs/features.

       #Regression: the mean of the training labels/outputs.

       baseline <- mean(y.train)

       # print(data.name)
       # print(data.list[[data.name]])
       fold.v <- sample(rep(1:4, l=nrow(X.train)))
       resultES <- CodingProject3::NNetEarlyStoppingCV(X.train,
                                               y.train,
                                               fold.v,
                                               max.iterations=max.ite,
                                               step.size=0.05,
                                               n.hidden.units=30)

          }
         # pred.prob.list[test.fold] <- list(
         # earlyStopping=resultES$predict(one.data.set$feature.mat[is.test,]),
         # #L2regularized=resultREG$predict(one.data.set$feature.mat[is.test,]),
         # baseline=rep(baseline, sum(is.test)))

         # For each data set, compute a 4 x 3 matrix of mean test loss values:
         # each of the four rows are for a specific test set,
         # the first column is for the early stopping predictor,
         # the second column is for the L2 regularized predictor,
         # the third column is for the baseline/un-informed predictor.

         # mat <- cbind(resultES$penalty.vec, result$penalty.vec)



 #      str(pred.prob.list)
  #     test.fold.result.list <- list()
   #    for(algo in names(pred.prob.list))
    #   {
     #    pred.prob.vec <- pred.prob.list[[algo]]
      #   pred.class.vec <- ifelse(pred.prob.vec > 0.5, 1, 0)
       #  test.fold.result.list[[algo]] <- mean(data.set$labels[is.test] != #pred.class.vec)
 #      }
    }
  }

  # For spam SAheart and train

#  else
#  {

    #For each train/test split, to show that your algorithm is actually learning something non-trivial from the inputs/features, compute a baseline predictor that ignores the inputs/features.
    # Binary classification: the most frequent class/label/output in the training data.

    #prdictionBase <- mfv(fold.vec)
    #baseline.append(mfv(fold.vec))

#    resultES <- LMLogisticLossEarlyStoppingCV(X.mat,
#                                            y.vec,
#                                            fold.vec,
                                            #max.iterations)
#    result <- LMLogisticLossIterations(X.mat,
#                                     y.vec,
#                                     fold.vec,
#                                     max.iterations)

#    resultList.append(resultES)
#    resultList.append(result)


#  }

  # For each data set, compute a 4 x 3 matrix of mean test loss values:
    # each of the four rows are for a specific test set,
    # the first column is for the early stopping predictor,
    # the second column is for the L2 regularized predictor,
    # the third column is for the baseline/un-informed predictor.

  #mat <- cbind(resultES$penalty.vec, result$penalty.vec)

  # List is (LMLogisticLossEarlyStoppingCV, LMLogisticLossIterations for spam SAheart and then train, 
  # Then  LMSquareLossEarlyStoppingCV LMSquareLossIterations for prostate and ozone)
# ggplot(resultsList[5]$train.loss.vec)
# ggplot(resultsList[5]$validation.loss.vec)
# ggplot(resultsList[6]$train.loss.vec)
# ggplot(resultsList[6]$validation.loss.vec)

Data set 1: spam

Matrix of loss values

#print out and/or plot the matrix of loss values
# pander::pandoc.table(resultsList[1]$train.loss.mat)
# pander::pandoc.table(resultsList[2]$train.loss.mat)

comment on difference between NN and baseline.

Train/validation loss plot

#plot the two loss functions.
plot(resultES$mean.train.loss.vec)
plot(resultES$mean.validation.loss)
# ggplot(resultsList[2]$train.loss.vec)
# ggplot(resultsList[2]$validation.loss.vec)

What are the optimal regularization parameters?

# selected 

Data set 2: SAheart

Matrix of loss values

#print out and/or plot the matrix.
# pander::pandoc.table(resultsList[3]$train.loss.mat)
# pander::pandoc.table(resultsList[4]$train.loss.mat)

comment on difference between NN and baseline.

Train/validation loss plot

#plot the two loss functions.
# ggplot(resultsList[3]$train.loss.vec)
# ggplot(resultsList[3]$validation.loss.vec)
# ggplot(resultsList[4]$train.loss.vec)
# ggplot(resultsList[4]$validation.loss.vec)

What are the optimal regularization parameters?

# selected

Data set 3: zip.train

Matrix of loss values

#print out and/or plot the matrix.
# pander::pandoc.table(resultsList[5]$train.loss.mat)
# pander::pandoc.table(resultsList[6]$train.loss.mat)

comment on difference between NN and baseline.

Train/validation loss plot

#plot the two loss functions.
#plot the two loss functions.
# ggplot(resultsList[5]$train.loss.vec)
# ggplot(resultsList[5]$validation.loss.vec)
# ggplot(resultsList[6]$train.loss.vec)
# ggplot(resultsList[6]$validation.loss.vec)

What are the optimal regularization parameters?

# selected

Data set 4: prostate

Matrix of loss values

#print out and/or plot the matrix.
# pander::pandoc.table(resultsList[7]$train.loss.mat)
# pander::pandoc.table(resultsList[8]$train.loss.mat)

comment on difference between NN and baseline.

Train/validation loss plot

#plot the two loss functions.
# ggplot(resultsList[7]$train.loss.vec)
# ggplot(resultsList[7]$validation.loss.vec)
# ggplot(resultsList[8]$train.loss.vec)
# ggplot(resultsList[8]$validation.loss.vec)

What are the optimal regularization parameters?

# selected

Data set 5: ozone

Matrix of loss values

#print out and/or plot the matrix.
# pander::pandoc.table(resultsList[9]$train.loss.mat)
# pander::pandoc.table(resultsList[10]$train.loss.mat)

comment on difference between NN and baseline.

Train/validation loss plot

#plot the two loss functions.
# ggplot(resultsList[9]$train.loss.vec)
# ggplot(resultsList[9]$validation.loss.vec)
# ggplot(resultsList[10]$train.loss.vec)
# ggplot(resultsList[10]$validation.loss.vec)

What are the optimal regularization parameters?

Loss

$L(w,v) = \frac{1}2 \times [\Sigma^{n}{1}[w^{t}S[V^{T}x{i}] - y_{i}]^2]$

Gradient/Backprop

  1. $A = XV$
  2. $Z = S[A]$
  3. $b = ZW$
  4. $\delta^{w} = b - y$
  5. $\delta^{v} = Diag(\delta^{v}) \times S'[A] \times Diag(w)$
  6. $\nabla_{w}L(w,v) = (Z^{t} \delta^{w}) / n$
  7. $\nabla_{v}L(w,v) = (X^{t} \delta^{v}) / n$


mertayD/CodingProject3 documentation built on May 14, 2019, 3:07 a.m.