R/Wrappers.R
In biClassify: Binary Classification Using Extensions of Discriminant Analysis

Documented in KOS

KOS_Classify <- function(TestData, TrainData, TrainCat, Sigma = NULL, Gamma = NULL , Lambda = NULL){
  if(ncol(TestData) != ncol(TrainData)) stop("Number of features in X and TrainData are different.")
  if( is.null(Sigma) || is.null(Gamma) || is.null(Lambda)){
    output <- SelectParams(TrainData = TrainData, 
                           TrainCat = TrainCat,
                           Sigma = Sigma,
                           Gamma = Gamma)
    Sigma <- output$Sigma
    Gamma <- output$Gamma
    Lambda <- output$Lambda
  }
  
  Y <- IndicatMat(TrainCat)$Categorical
  Theta <- OptScores(TrainCat)
  YTheta <- Y %*% Theta
  
  output <- SparseKernOptScore(TrainData = TrainData, 
                               TrainCat = TrainCat, 
                               w0 = rep(1, ncol(TrainData)), 
                               Lambda = Lambda,
                               Gamma = Gamma, 
                               Sigma = Sigma, 
                               Maxniter = 100,
                               Epsilon = 1e-05, 
                               Error = 1e-05)
  
  w <- output$Weights
  Dvec <- output$Dvec
  
  Kw <- KwMat(TrainData , w, Sigma)
  
  # Create projection Values
  TrainProjections <- GetProjectionsCPP(TestData = TrainData, 
                                    TrainData = TrainData, 
                                    TrainCat = TrainCat, 
                                    Dvec = Dvec, 
                                    w = w, 
                                    Kw = Kw, 
                                    Sigma = Sigma, 
                                    Gamma = Gamma)
  
  ### Need test projection values for LDA
  NewProjections <- GetProjectionsCPP(TestData = TestData, 
                                  TrainData = TrainData, 
                                  TrainCat = TrainCat, 
                                  Dvec = Dvec,  
                                  w = w, 
                                  Kw = Kw, 
                                  Sigma = Sigma , 
                                  Gamma = Gamma )
  
  NewProjections <- as.data.frame(NewProjections)
  colnames(NewProjections) <- c("Projections")
  
  ### All of this is used to create discirminant line
  Training <- data.frame(TrainCat, TrainProjections)
  colnames(Training) <- c("Category", "Projections")
  
  ## fit LDA on training projections
  LDAfit <- MASS::lda(Category ~ Projections, data = Training)
  
  # Predict class membership using LDA
  Predictions <- stats::predict(object = LDAfit, newdata = NewProjections)$class
  
  return(list(Predictions = Predictions, 
              Weights = w, 
              Dvec = Dvec))
}

is.not.null <- function(x) !is.null(x)


#' @title Function which generates feature weights, discriminant vector, and class predictions.
#' @param TestData (m x p) Matrix of unlabelled data with numeric features to be classified. Cannot have missing values.
#' @param TrainData (n x p) Matrix of training data with numeric features. Cannot have missing values.
#' @param TrainCat (n x 1) Vector of class membership corresponding to Data. Values must be either 1 or 2.
#' @param Method A string of characters which determines which version of KOS to use. Must be either "Full" or "Subsampled". Default is "Full".
#' @param Mode A string of characters which determines how the reduced sample paramters will be inputted for each method. Must be either "Research", "Interactive", or "Automatic". Default is "Automatic".
#' @param m1 The number of class 1 compressed samples to be generated. Must be a positive integer.
#' @param m2 The number of class 2 compressed samples to be generated. Must be a positive integer.
#' @param Sigma Scalar Gaussian kernel parameter. Default set to NULL and is automatically generated if user-specified value not provided. Must be > 0. User-specified parameters must satisfy hierarchical ordering.
#' @param Gamma Scalar ridge parameter used in kernel optimal scoring. Default set to NULL and is automatically generated if user-specified value not provided. Must be > 0. User-specified parameters must satisfy hierarchical ordering.
#' @param Lambda Scalar sparsity parameter on weight vector. Default set to NULL and is automatically generated by the function if user-specified value not provided. Must be >= 0. When Lambda = 0, SparseKOS defaults to kernel optimal scoring of [Lapanowski and Gaynanova, preprint] without sparse feature selection. User-specified parameters must satisfy hierarchical ordering.
#' @param Epsilon Numerical stability constant with default value 1e-05. Must be > 0 and is typically chosen to be small.
#' @references Lapanowski, Alexander F., and Gaynanova, Irina. ``Sparse feature selection in kernel discriminant analysis via optimal scoring'', Artificial Intelligence and Statistics, 2019.
#' @description Returns a (m x 1) vector of predicted group membership (either 1 or 2) for each data point in X. Uses Data and Cat to train the classifier.
#' @details Function which handles classification. Generates feature weight vector and discriminant coefficients vector in sparse kernel optimal scoring. If a matrix X is provided, the function classifies each data point using the generated feature weight vector and discriminant vector. Will use user-supplied parameters Sigma, Gamma, and Lambda if any are given. If any are missing, the function will run SelectParams to generate the other parameters. User-specified values must satisfy hierarchical ordering.
#' @examples 
#' \donttest{
#' Sigma <- 1.325386  #Set parameter values equal to result of SelectParam.
#' Gamma <- 0.07531579 #Speeds up example.
#' Lambda <- 0.002855275
#' 
#' TrainData <- KOS_Data$TrainData
#' TrainCat <- KOS_Data$TrainCat
#' TestData <- KOS_Data$TestData
#' TestCat <- KOS_Data$TestCat
#' 
#' KOS(TestData = TestData, 
#'     TrainData = TrainData, 
#'     TrainCat = TrainCat , 
#'     Sigma = Sigma , 
#'     Gamma = Gamma , 
#'     Lambda = Lambda)
#'}
#' @return  A list of
#' \item{Predictions}{ (m x 1) Vector of predicted class labels for the data points in TestData. Only included in non-null value of X is provided.} 
#' \item{Weights}{ (p x 1) Vector of feature weights.} 
#' \item{Dvec}{(n x 1) Discrimiant coefficients vector.}
#' @export
KOS <- function(TestData = NULL, TrainData, TrainCat, Method = "Full", Mode = "Automatic", m1 = NULL, m2 = NULL, Sigma = NULL, Gamma = NULL, Lambda = NULL, Epsilon = 1e-05){
  # --- Check Data ---
  CheckData(TrainData = TrainData, TrainCat = TrainCat, TestData = TestData)
  
  # --- Set Automatic Reduced Sample Amounts ---
  if(Mode == "Automatic"){
    # --- Initialize Reduced Data Amounts and Sparsity Level ---
    m1 <- sum(TrainCat == 1)/10
    m2 <- sum(TrainCat == 2)/10
  }
  
  # --- Train Full KOS ---
  if(Method == "Full"){
    output <- SelectParams(TrainData = TrainData, 
                           TrainCat = TrainCat,
                           Sigma = Sigma, 
                           Gamma = Gamma, 
                           Epsilon = Epsilon)
    Sigma <- output$Sigma
    Gamma <- output$Gamma
    Lambda <- output$Lambda
    
 
    Predict <- KOS_Classify(TestData = TestData, 
                            TrainData = TrainData, 
                            TrainCat = TrainCat,
                            Sigma = Sigma, 
                            Gamma = Gamma, 
                            Lambda = Lambda)
    return(Predict)
  }
  
  # --- Run Subsampled KOS ---
  else if(Method == "Subsampled"){
    if(Mode == "Interactive"){
      m1 <- readline(prompt="Please enter the number of group 1 sub-samples: ")
      m2 <- readline(prompt="Please enter the number of group 2 sub-samples: ")
      
      m1 <- as.integer(m1)
      m2 <- as.integer(m2)
    }
    
    #--- Subsample Classes ---
    output <- subsampleClasses(TrainData = TrainData,
                               TrainCat = TrainCat,
                               m1 = m1,
                               m2 = m2)
    SubData <- output$Data
    SubCat <- output$Cat
    
    #--- Get Parameters ---
    output <- SelectParams(TrainData = SubData, 
                           TrainCat = SubCat , 
                           Sigma = Sigma, 
                           Gamma = Gamma, 
                           Epsilon = Epsilon)
    Sigma <- output$Sigma
    Gamma <- output$Gamma
    Lambda <- output$Lambda
    
    # --- Generate Predictions, Weights, and Dvec ---
    SubPredict <- KOS_Classify(TestData = TestData, 
                            TrainData = SubData, 
                            TrainCat = SubCat, 
                            Sigma = Sigma, 
                            Gamma = Gamma, 
                            Lambda = Lambda)
    return(SubPredict)
  }
  
  # --- Else Return Error ---
  else{
    stop("Error: Method not given a proper value.")
  }
}


CheckData <- function(TrainData, TrainCat, TestData){
  # - Check for NA values in TrainData and TrainCat
  if(any(is.na(TrainData))){
    stop("TrainData has NA values.")
  }
  
  if(any(is.na(TrainCat))){
    stop("TrainCat has NA values.")
  }
  
  if(!is.null(TestData)){
    if(ncol(TestData) != ncol(TrainData)){
      stop("TrainData and TestData have different number of features.")
    }
  }
}