#' Supervised Machine Learning K Nearest Neighbours
#' @description This is function is based on a popular K-Nearest Neighbour Algorith, and fits a k nearest neighbour model using fast search algorithms. KNN falls in the category of supervised machine learning
#' algorithm which can be used for classification and regression tasks.
#' @format \code{\link{R6Class}} object.
#' @section Usage:
#' For details of the Usage refer to the \bold{Method,Examples & Arguments} sections of the Documentation.
#' \preformatted{
#' best_model = KNN$new(k=1, proba=FALSE, algorithm=NULL, type="class")
#' best_model$fit(X_train, X_test, "target")
#' best_model$predict(type)
#' }
#' @section Methods:
#' \describe{
#' \item{\code{$new()}}{Initialisization: This will initialize a new instance of the trainer}
#' \item{\code{$fit()}}{Fitting Model : By calling this method it trains the knn model and stores the predictions}
#' \item{\code{$predict()}}{Predictions : It returns the predictions previously stored when calling the fit Method}
#' }
#' @section Arguments:
#' \describe{
#' \item{k}{The Number of Nearest Neighbours to be considered while making a prediction}
#' \item{proba}{It specified if the probability should be computed or not by default the labelled value will be returned, default=FALSE}
#' \item{algorithm}{It specifies the algorithm which is used to fit the model, Options are 'kd_tree','cover_tree','brute'}
#' \item{type}{It specifies the type of problem to be solved , for example a classification or regression problem , values to be specified are 'regr' or 'class'}
#' }
#' @export
#' @examples
#' data("iris")
#'
#' iris$Species <- as.integer(as.factor(iris$Species))
#'
#' x_train <- iris[1:110,]
#' x_test <- iris[111:150,]
#'
#' best_model <- KNN$new(k=3, proba=TRUE, type="class")
#' best_model$fit(x_train, x_test, 'Species')
#' preds <- best_model$predict(type="raw")
KNN <- R6Class("KNN", public = list(
#' @field k the number of neighbours to consider while predicting ::: 1 by default but must be specified
k = 1,
#' @field proba must be set to True if probability needs to be computed ::: by default the value is set to FALSE
proba = FALSE,
#' @field algorithm used to train the model, the options are 'kd_tree','cover_tree','brute' ::: by default no algorithm is specified
algorithm = NULL,
#' @field type of problem to to be solved , the regression or classification type must be specified, options include 'regr' and 'class' ::: by default the value is set to class
type = "class",
#' @field model
model = NA,
#' @details
#' Create an instance of the new `KNN` object from the class.
#'
#' @param k k specidies the number of neighbours to be considering while making a prediction.
#' @param proba proba specifies if probability ranging between 0-1 shall be computed,by default value is set to FALSE.
#' @param algorithm algorithm specifies the algorithm or method used to fit the model, options include 'kd_tree','cover_tree', and 'brute'.
#' @param type specifies the type of problem under study that needs to be solved for example a regression or a classification problem, options are 'regr' or 'class'.
#' @return A `KNN` object that can be used to call fit and predict methods.
#'
#' @examples
#' data("iris")
#'
#' iris$Species <- as.integer(as.factor(iris$Species))
#'
#' x_train <- iris[1:110,]
#' x_test <- iris[111:150,]
#'
#' best_model <- KNN$new(k=3, proba=TRUE, type="class")
#' best_model$fit(x_train, x_test, 'Species')
#' preds <- best_model$predict(type="raw")
initialize = function(k, proba, algorithm, type){
if(!(missing(k))) self$k <- k
if(!(missing(proba))) self$proba <- proba
if(!(missing(algorithm))) self$algorithm <- algorithm
if(!(missing(type))) self$type <- type
superml::check_package("FNN")
},
#' @details
#' Trains the the K Nearest Neighbour KNN model
#'
#' @param train_data must be R data frame or matrix
#' @param test_data must be a R data frame or matrix
#' @param y Can be a character, name or value of target variable
#' @return NULL
#'
#' @examples
#' data("iris")
#'
#' iris$Species <- as.integer(as.factor(iris$Species))
#'
#' x_train <- iris[1:110,]
#' x_test <- iris[111:150,]
#'
#' best_model <- KNN$new(k=3, proba=TRUE, type="class")
#' best_model$fit(x_train, x_test, 'Species')
fit = function(train_data, test_data, y){
data <- private$prepare_data(train_data, test_data, y)
if(self$type == "class"){
self$model <- FNN::knn(train = data$train_data
,test = data$test_data
,cl = data$y
,k = self$k
,prob = self$proba
,algorithm = self$algorithm)
} else if (self$type == "regr"){
self$model <- FNN::knn.reg(train = data$train_data
,test = data$test_data
,y = data$y
,k = self$k
,algorithm = self$algorithm)
}
},
#' @details
#' Makes a Prediction based on the Nearest Neigbours for data
#'
#' @param type A Character, 'raw' for the exact labels else 'proba'
#' @return A list of predictions based on the neighbours
#'
#' @examples
#' data("iris")
#'
#' iris$Species <- as.integer(as.factor(iris$Species))
#'
#' x_train <- iris[1:110,]
#' x_test <- iris[111:150,]
#'
#' best_model <- KNN$new(k=3, proba=TRUE, type="class")
#' best_model$fit(x_train, x_test, 'Species')
#' preds <- best_model$predict(type="raw")
predict = function(type="raw"){
if (self$type == "class") {
if (type == "raw") {
return(as.numeric(as.character(self$model)))
} else if (type == "proba") {
return(attr(self$model, "proba"))
}
} else if (self$type == "regr") {
return(self$model$pred)
}
}),
private = list(
prepare_data = function(train_data, test_data, y){
train_data <- as.data.table(train_data)
test_data <- as.data.table(test_data)
if (!(y %in% names(train_data)))
stop(sprintf("%s Not Available in the Given Training Data", y))
# get dependent variable and store temporarily
y_temp <- train_data[[y]]
# select all independent features
train_data <- train_data[,setdiff(names(train_data), y), with = F]
# subset from test, just in case if the dependet variable is in test
test_data <- test_data[, setdiff(names(test_data), y), with = F]
# set dependent variable to y
y <- y_temp
if (ncol(test_data) != ncol(train_data))
stop(sprintf('Train and Test data must not have
unequal number of Independent Variables.'))
if (any(vapply(train_data, is.factor, logical(1)))
| any(vapply(train_data, is.character, logical(1))))
stop("Train Data must not Have Non-Numeric Variables.
Please Preprocess the data to convert all Variables into integer ")
if (any(vapply(test_data, is.factor, logical(1)))
| any(vapply(test_data, is.character, logical(1))))
stop("Test Data must not Have Non-Numeric Variables.
Please Preprocess the data to convert all Variables into integer")
# checking if target variable contains float values or NA values
if (any(is.na(y)))
stop("The Target Variable to be Predicted Contains NA Values.")
if (self$type=="class") {
if (is.numeric(y)){
if (!(all(y == floor(y))))
stop("The Target Variable to be Predicted Contains Float values")
}
}
return(list(train_data = train_data, test_data = test_data, y = y))
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.