inst/doc/binary_classification_using_the_ionosphere_data.R

## ---- eval=T------------------------------------------------------------------

data(ionosphere, package = 'KernelKnn')

apply(ionosphere, 2, function(x) length(unique(x)))

# the second column will be removed as it has a single unique value

ionosphere = ionosphere[, -2]

## ---- eval=T------------------------------------------------------------------
# recommended is to scale the data

X = scale(ionosphere[, -ncol(ionosphere)])
y = ionosphere[, ncol(ionosphere)]

## ---- eval=T------------------------------------------------------------------

# labels should be numeric and begin from 1:Inf

y = c(1:length(unique(y)))[ match(ionosphere$class, sort(unique(ionosphere$class))) ]

# random split of data in train and test

spl_train = sample(1:length(y), round(length(y) * 0.75))
spl_test = setdiff(1:length(y), spl_train)
str(spl_train)
str(spl_test)


# evaluation metric

acc = function (y_true, preds) {
  
  out = table(y_true, max.col(preds, ties.method = "random"))
  
  acc = sum(diag(out))/sum(out)
  
  acc
}


## ---- eval=T, warning = FALSE, message = FALSE--------------------------------

library(KernelKnn)

preds_TEST = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 5 , 
                       
                       method = 'euclidean', weights_function = NULL, regression = F,
                       
                       Levels = unique(y))
head(preds_TEST)


## ---- eval=T------------------------------------------------------------------


preds_TEST_tric = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 10 , 
                            
                            method = 'canberra', weights_function = 'tricube', regression = F,  
                            
                            Levels = unique(y))
head(preds_TEST_tric)


## ---- eval=T------------------------------------------------------------------


norm_kernel = function(W) {
  
  W = dnorm(W, mean = 0, sd = 1.0)
  
  W = W / rowSums(W)
  
  return(W)
}


preds_TEST_norm = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 10 , 
                            
                            method = 'canberra', weights_function = norm_kernel, regression = F, 
                            
                            Levels = unique(y))
head(preds_TEST_norm)


## ---- eval = T, echo = F------------------------------------------------------

 knitr::kable(data.frame(k = c(10,9), method = c('canberra', 'canberra'), kernel = c('tricube', 'epanechnikov')))

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------

fit_cv_pair1 = KernelKnnCV(X, y, k = 10 , folds = 5, method = 'canberra', 
                           
                           weights_function = 'tricube', regression = F, 
                           
                           Levels = unique(y), threads = 5, seed_num = 5)

## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair1)

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------
fit_cv_pair2 = KernelKnnCV(X, y, k = 9 , folds = 5,method = 'canberra',
                           
                           weights_function = 'epanechnikov', regression = F,
                           
                           Levels = unique(y), threads = 5, seed_num = 5)

## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair2)


## ---- eval=T------------------------------------------------------------------
acc_pair1 = unlist(lapply(1:length(fit_cv_pair1$preds), 
                          
                          function(x) acc(y[fit_cv_pair1$folds[[x]]], 
                                          
                                          fit_cv_pair1$preds[[x]])))
acc_pair1

cat('accurcay for params_pair1 is :', mean(acc_pair1), '\n')

acc_pair2 = unlist(lapply(1:length(fit_cv_pair2$preds), 
                          
                          function(x) acc(y[fit_cv_pair2$folds[[x]]], 
                                          
                                          fit_cv_pair2$preds[[x]])))
acc_pair2

cat('accuracy for params_pair2 is :', mean(acc_pair2), '\n')


## ---- eval = T, echo = F------------------------------------------------------

 knitr::kable(data.frame(k = c(16,5), method = c('canberra', 'canberra'), kernel = c('biweight_triweight_gaussian_MULT', 'triangular_triweight_MULT')))

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------

fit_cv_pair1 = KernelKnnCV(X, y, k = 16, folds = 5, method = 'canberra', 
                           
                           weights_function = 'biweight_triweight_gaussian_MULT', 
                           
                           regression = F, Levels = unique(y), threads = 5, 
                           
                           seed_num = 5)

## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair1)

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------

fit_cv_pair2 = KernelKnnCV(X, y, k = 5, folds = 5, method = 'canberra', 
                           
                           weights_function = 'triangular_triweight_MULT', 
                           
                           regression = F, Levels = unique(y), threads = 5,
                           
                           seed_num = 5)


## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair2)


## ---- eval=T------------------------------------------------------------------
acc_pair1 = unlist(lapply(1:length(fit_cv_pair1$preds), 
                          
                          function(x) acc(y[fit_cv_pair1$folds[[x]]], 
                                          
                                          fit_cv_pair1$preds[[x]])))
acc_pair1

cat('accuracy for params_pair1 is :', mean(acc_pair1), '\n')

acc_pair2 = unlist(lapply(1:length(fit_cv_pair2$preds), 
                          
                          function(x) acc(y[fit_cv_pair2$folds[[x]]],
                                          
                                          fit_cv_pair2$preds[[x]])))
acc_pair2

cat('accuracy for params_pair2 is :', mean(acc_pair2), '\n')

Try the KernelKnn package in your browser

Any scripts or data that you put into this service are public.

KernelKnn documentation built on Jan. 7, 2023, 1:18 a.m.