Nothing
## ---- eval=T------------------------------------------------------------------
data(ionosphere, package = 'KernelKnn')
apply(ionosphere, 2, function(x) length(unique(x)))
# the second column will be removed as it has a single unique value
ionosphere = ionosphere[, -2]
## ---- eval=T------------------------------------------------------------------
# recommended is to scale the data
X = scale(ionosphere[, -ncol(ionosphere)])
y = ionosphere[, ncol(ionosphere)]
## ---- eval=T------------------------------------------------------------------
# labels should be numeric and begin from 1:Inf
y = c(1:length(unique(y)))[ match(ionosphere$class, sort(unique(ionosphere$class))) ]
# random split of data in train and test
spl_train = sample(1:length(y), round(length(y) * 0.75))
spl_test = setdiff(1:length(y), spl_train)
str(spl_train)
str(spl_test)
# evaluation metric
acc = function (y_true, preds) {
out = table(y_true, max.col(preds, ties.method = "random"))
acc = sum(diag(out))/sum(out)
acc
}
## ---- eval=T, warning = FALSE, message = FALSE--------------------------------
library(KernelKnn)
preds_TEST = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 5 ,
method = 'euclidean', weights_function = NULL, regression = F,
Levels = unique(y))
head(preds_TEST)
## ---- eval=T------------------------------------------------------------------
preds_TEST_tric = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 10 ,
method = 'canberra', weights_function = 'tricube', regression = F,
Levels = unique(y))
head(preds_TEST_tric)
## ---- eval=T------------------------------------------------------------------
norm_kernel = function(W) {
W = dnorm(W, mean = 0, sd = 1.0)
W = W / rowSums(W)
return(W)
}
preds_TEST_norm = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 10 ,
method = 'canberra', weights_function = norm_kernel, regression = F,
Levels = unique(y))
head(preds_TEST_norm)
## ---- eval = T, echo = F------------------------------------------------------
knitr::kable(data.frame(k = c(10,9), method = c('canberra', 'canberra'), kernel = c('tricube', 'epanechnikov')))
## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------
fit_cv_pair1 = KernelKnnCV(X, y, k = 10 , folds = 5, method = 'canberra',
weights_function = 'tricube', regression = F,
Levels = unique(y), threads = 5, seed_num = 5)
## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair1)
## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------
fit_cv_pair2 = KernelKnnCV(X, y, k = 9 , folds = 5,method = 'canberra',
weights_function = 'epanechnikov', regression = F,
Levels = unique(y), threads = 5, seed_num = 5)
## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair2)
## ---- eval=T------------------------------------------------------------------
acc_pair1 = unlist(lapply(1:length(fit_cv_pair1$preds),
function(x) acc(y[fit_cv_pair1$folds[[x]]],
fit_cv_pair1$preds[[x]])))
acc_pair1
cat('accurcay for params_pair1 is :', mean(acc_pair1), '\n')
acc_pair2 = unlist(lapply(1:length(fit_cv_pair2$preds),
function(x) acc(y[fit_cv_pair2$folds[[x]]],
fit_cv_pair2$preds[[x]])))
acc_pair2
cat('accuracy for params_pair2 is :', mean(acc_pair2), '\n')
## ---- eval = T, echo = F------------------------------------------------------
knitr::kable(data.frame(k = c(16,5), method = c('canberra', 'canberra'), kernel = c('biweight_triweight_gaussian_MULT', 'triangular_triweight_MULT')))
## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------
fit_cv_pair1 = KernelKnnCV(X, y, k = 16, folds = 5, method = 'canberra',
weights_function = 'biweight_triweight_gaussian_MULT',
regression = F, Levels = unique(y), threads = 5,
seed_num = 5)
## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair1)
## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------
fit_cv_pair2 = KernelKnnCV(X, y, k = 5, folds = 5, method = 'canberra',
weights_function = 'triangular_triweight_MULT',
regression = F, Levels = unique(y), threads = 5,
seed_num = 5)
## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair2)
## ---- eval=T------------------------------------------------------------------
acc_pair1 = unlist(lapply(1:length(fit_cv_pair1$preds),
function(x) acc(y[fit_cv_pair1$folds[[x]]],
fit_cv_pair1$preds[[x]])))
acc_pair1
cat('accuracy for params_pair1 is :', mean(acc_pair1), '\n')
acc_pair2 = unlist(lapply(1:length(fit_cv_pair2$preds),
function(x) acc(y[fit_cv_pair2$folds[[x]]],
fit_cv_pair2$preds[[x]])))
acc_pair2
cat('accuracy for params_pair2 is :', mean(acc_pair2), '\n')
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.