aiR: Simple machine learning algorithm tools

Documented in aiR aiRactivation aiRcost aiRfresh aiRrun backprop

#' @name aiR
#'
#' @title aiR
#'
#' @param data Data frame that contains all named columns needed
#' @param input.col character vector of column names for aiRnet
#' @param var.classify index or column name of vector that contains classifying values
#' @param aiRnet aiRnet object generated by aiRnet function
#' @param cycles Number of cycles done to correct.
#' @param sample.method Method to save internal subset of data as the training data. "Sample" to take
#'  a random sample of all rows in data as training set. "Factor" to indicate if you are training
#'  on rows containing a particular factor level.
#' @param sample.size Number between (0-1) that modifies how much of desired sample.method data is used.
#' Default set to 0.5. Excluded rows will be used as test examples and not affect aiRnet.
#' @param batch.size Number indicating how many rows to make batches from training sample. Default set to "all"
#' for no batches to be made.
#' @param sample.Factor Necessary when sample.method set to "Factor". Assign as vector of length 2 in the following form
#' c("column.index/name","factor.level"). Can assign only column name or index but first level is chosen in this case.
#' @param na.rm remove NAs, default set to TRUE. Function likely to fail with NAs
#' @param test.rate "none" or "fast" or "slow" or integer specifying how often the test model is checked. Test model is
#' used only when sample.size < 0.8 is set, otherwise the full model is used instead. "fast" indicates model is tested
#' at begining and end. "slow" indicates full model is tested each cycle. An integer indicates how many cycles pass
#' before testing the model. e.i. 1 is equivelent to "slow", 2 is every other cycle, 3 is every third cycle, ... etc.
#' "none" indicates that the model will not be verified on any model.
#'
#' @return data frame of loss and the aiRnet of the training values. data set used is optional
#'
#' @export
aiR <- function(data,
                input.col,
                var.classify,
                aiRnet,
                cycles = 100,
                sample.method="Sample",
                sample.size=.5,
                batch.size="all",
                sample.Factor = NULL,
                na.rm=TRUE,
                test.rate = "fast") {
  if(!is.aiRnet(aiRnet)){
    stop("aiRnet must be of class \"aiRnet\"")
  }
  if(!all(input.col%in%colnames(data))){
    mesg <- paste0(paste0("\"",input.col[!input.col%in%colnames(data)],"\""), collapse = ", ")
    stop(paste("Error: the following character(s) are not found in data header:", mesg,""))
  }
  if(!all(unlist(lapply(data[,input.col], is.numeric)))){
    stop(paste("At least 1 column in input.col is not a numeric vector"))
  }
  n <- length(aiRnet)             #how many layers
  classify.vec <- data[[var.classify]]
  if(isTRUE(na.rm)) {
    data <- na.exclude(data)
  } else {
    warning("na.rm set to FALSE, NA values not sutable for matrix multiplication. Set NA to place holder value.")
  }
  # if(!is.element(test, c(TRUE,FALSE))) {
  #   stop("test must be either TRUE or FALSE")
  # }
  # if(!is.element(data.return, c(TRUE,FALSE))) {
  #   stop("data.return must be either TRUE or FALSE")
  # }
  if(!is.factor(classify.vec)) {
    stop("var.classify must be assigned a factor vector as a classification variable.")
  } else {
    class.levels <- levels(classify.vec)  #variable.. recording levels of classification vector.
  }
  #How much of data is training, what method to train against
  sample.rows <- aiRsubset(x = data,
                           sample.method = sample.method,
                           sample.size = sample.size,
                           sample.Factor = sample.Factor)
  if(batch.size=="all"){
    batch.size <- sum(sample.rows)
  }
  if(is.element(test.rate, set = c("none","fast","slow"))){
    if(test.rate=="none"){
      modularTest <- cycles+1
    } else if(test.rate=="fast"){
      modularTest <- cycles
    } else if(test.rate=="slow"){
      modularTest <- 1
    }
  } else if(is.numeric(test.rate)&&test.rate%%1==0){
    modularTest <- test.rate
  } else {
    stop("test.rate must be a non-zero integer or \"none\", or \"fast\", \"slow\".")
  }

  data <- data[,c(input.col)]
  data_train <- data[sample.rows,]
  classify_train.vec <- classify.vec[sample.rows]
  if(sample.size<0.8){
    data_test <- data[!sample.rows,]
    classify_test.vec <- classify.vec[!sample.rows]
  } else {
    data_test <- data
    classify_test.vec <- classify.vec
  }

  #Decide what to do with test variable and test.rate variable


  aiRobj <- aiRrun(data_train = data_train,
                   data_test = data_test,
                   classify_train.vec = classify_train.vec,
                   classify_test.vec = classify_test.vec,
                   class.levels = class.levels,
                   aiRnet = aiRnet,
                   cycles = cycles,
                   batch.size = batch.size,
                   modularTest = modularTest)


  return(aiRobj)
}



#' @name aiRrun
#'
#' @title aiRrun
#'
#' @description handles the activation and back propigation of the network.
#'
#' @param data_train training dataset
#' @param data_test testing dataset
#' @param aiRnet aiRnet object to be trained
#' @param classify_train.vec vector of training classifiers
#' @param classify_test.vec vector of test calssifiers
#' @param class.levels the levels of the classifier vector
#' @param cycles number of iterations done
#' @param batch.size number of samples taken at a time in a batch
#' @param modularTest How many cycles pass before the aiRnet is used on data_test
#'
#' @return list of aiRnet and dataframe
aiRrun <- function(data_train,
                   data_test,
                   aiRnet,
                   classify_train.vec,
                   classify_test.vec,
                   class.levels,
                   cycles,
                   batch.size,
                   modularTest){
  #subdivide into batches
  batches <- aiRbatch(data = data_train, batch.size = batch.size)
  nb <- length(batches)
  tot.cost <- numeric(cycles)
  mean.cost <- numeric(cycles)
  test.cost <- rep(NA,cycles)
  n <- length(aiRnet)+1
  #test model
  aiRbest <- aiRnet
  min.cost <- batch.size*ncol(data_train)
  for(i in 1:cycles){

    if(i==1||i%%modularTest==0){
      testActiv <- aiRactivation(data = data_test, aiRnet = aiRnet)
      testCost <- aiRcost(data = testActiv[[n]], class.levels = class.levels, classify = classify_test.vec)
      test.cost[i] <- testCost$mean.cost
    }

    if(i%%nb==0){
      b <- batches[[nb]]
      batches <- aiRbatch(data = data_train, batch.size = batch.size)
    } else {
      b <- batches[[i%%nb]]
    }
    #find activation
    aiRactiv <- aiRactivation(data = data_train[b,], aiRnet = aiRnet)
    # find cost
    aiRcost <- aiRcost(data = aiRactiv[[n]], class.levels = class.levels, classify = classify_train.vec[b])

    tot.cost[i] <- aiRcost$total.cost
    mean.cost[i] <- aiRcost$mean.cost
    if(tot.cost[i]==min(tot.cost[i],min.cost)){
      aiRbest <- aiRnet
    }
    newaiR <-  backprop(aiRnet = aiRnet, aiRcost = aiRcost, aiRactivation = aiRactiv)
    aiRnet <- aiRfresh(aiRnet = newaiR)
    svMisc::progress(i, max.value = cycles, progress.bar = T)
  }
  d <- data.frame(cycles = 1:cycles, tot.cost = tot.cost, mean.cost = mean.cost, test.cost = test.cost)
  #ggplot(data = NULL, aes(x = 1:cycles, y = tot.cost)) + geom_point()
  ret <- list(aiRnet,aiRbest, d)
  names(ret) <- c("aiRnet","aiRbest","Cost")
  class(ret) <- "aiR"
  return(ret)
}




#' @name aiRactivation
#'
#' @title aiRactivation
#'
#' @description calculates and saves the activation of the network.
#'
#' @param data datasets to pass through model
#' @param aiRnet aiRnet object to be trained
#'
#' @return returns a list of length(aiRnet)+1 where the first index is data. Subsequent indexes show
#' the activation levels of neurons per data observation. May be helpful in data visualization.
#'
#' @export
aiRactivation <- function(aiRnet, data) {
  #
  if(!is.aiRnet(aiRnet)){
    stop("aiRnet must be of class \"aiRnet\"")
  }
  n <- length(aiRnet)

  l <- list(n+1)
  l[[1]] <- data
  for(i in 2:(n+1)) {   #Transform data through aiRnet
    data <- as.matrix(data)%*%aiRnet[[i-1]]$weights
    data <- mat.opperation(x = data, y = aiRnet[[i-1]]$bias, opperation = "+")
    data <- sigmoid(data)
    l[[i]] <- data
  }
  l[[n+1]] <- apply(l[[n+1]],2,zero)
  names(l) <- c("data",paste0("layer",1:n))
  class(l) <- "aiRactivation"
  return(l)

}


#' @name aiRcost
#'
#' @title aiRcost
#'
#' @description Calculates the resulting cost of the network for a given dataset activation.
#'
#' @param data takes in final activation level of a network.
#' @param classify Factor vector of correct classifications corresponding to data observations.
#' @param class.levels the levels of the classifier vector
#'
#' @return returns aiRcost object containing the following labels cost, dcost, obser.cost, total.cost, mean.cost
#'    cost: cost per value,
#'    dcost: differential cost per value,
#'    obser.cost: cost per observation,
#'    total.cost: batch total cost,
#'    mean.cost: batch mean cost
#'
#' @export
aiRcost <- function(data, classify, class.levels) {
  class.mat <- diag(nrow = length(class.levels),ncol = length(class.levels))
  correct <- class.mat[classify,]
  cost <- (data-correct)^2
  dcost <- 2*(data-correct)
  dsig <- dsigmoid(data)
  if(is.null(dim(cost))){
    obs.cost <- sum(cost)
  } else {
    obs.cost <- apply(cost,1,sum)
  }
  total.cost <- sum(obs.cost)
  mean.cost <- mean(obs.cost)
  ret <- list(cost, dcost, obs.cost, total.cost, mean.cost)
  names(ret) <- c("cost","dcost","obser.cost","total.cost", "mean.cost")
  class(ret) <- "aiRcost"
  return(ret)
}

#' @name backprop
#'
#' @title backprop
#'
#' @param aiRnet aiRnet object
#' @param aiRcost aiRcost object
#' @param aiRactivation aiRactivation object
#'
#' @return returns aiRnet with change.w and change.b values filled
backprop <- function(aiRnet, aiRcost, aiRactivation) {
  #
  n <- length(aiRactivation)
  n.obs <- nrow(aiRactivation[[n]])
  gencost <- aiRcost$dcost*dsigmoid(aiRactivation[[n]])
 # cost.vec <- cost.vec*scales::rescale(abs(cost.vec), c(1,2))
  deltaW <- t(aiRactivation[[n-1]])%*%gencost/n.obs
  deltaA <- gencost%*%t(aiRnet[[n-1]]$weights)
  deltaB <- unname(apply(gencost,2, mean))
  # start.adjust <- aiRactivation[[n-1]]*cost.vec
  aiRnet[[n-1]]$change.w <- deltaW
  aiRnet[[n-1]]$change.b <- deltaB
  if(n>2){
    for(i in (n-1):2){
      gencost <- deltaA*dsigmoid(aiRactivation[[i]])
      # start.adjust <- aiRactivation[[i-1]]*cost.vec
      deltaW <- t(aiRactivation[[i-1]])%*%gencost/n.obs
      deltaA <- gencost%*%t(aiRnet[[i-1]]$weights)
      deltaB <- apply(gencost,2, mean)
      aiRnet[[i-1]]$change.w <- deltaW #ideally we wouldnt do the apply
      aiRnet[[i-1]]$change.b <- deltaB
    }
  }
  return(aiRnet)
}

#' @name aiRfresh
#'
#' @title aiRfresh
#'
#' @description applies the changes of change.w and change.b to the layer's respective
#' weights and biasis
#'
#' @param aiRnet aiRnet object
#'
#' @return aiRnet object adjusted via change.w and change.b
aiRfresh <- function(aiRnet){
  n <- length(aiRnet)
  for(i in 1:n){
    aiRnet[[i]]$weights <- aiRnet[[i]]$weights - aiRnet[[i]]$change.w
    aiRnet[[i]]$bias <- aiRnet[[i]]$bias - aiRnet[[i]]$change.b
    aiRnet[[i]]$change.w <- aiRnet[[i]]$change.w*0
    aiRnet[[i]]$change.b <- aiRnet[[i]]$change.b*0
  }
  return(aiRnet)
}