inst/doc/regression_using_the_housing_data.R

## ---- eval=T------------------------------------------------------------------

data(Boston, package = 'KernelKnn')

str(Boston)


## ---- eval=T------------------------------------------------------------------
X = scale(Boston[, -ncol(Boston)])
y = Boston[, ncol(Boston)]

# random split of data in train and test

spl_train = sample(1:length(y), round(length(y) * 0.75))
spl_test = setdiff(1:length(y), spl_train)
str(spl_train)
str(spl_test)


# evaluation metric

mse = function (y_true, y_pred) {
  
  out = mean((y_true - y_pred)^2)
  
  out
}


## ---- eval=T------------------------------------------------------------------

library(KernelKnn)

preds_TEST = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 5 , 
                       
                       method = 'euclidean', weights_function = NULL, regression = T)
str(preds_TEST)


## ---- eval=T------------------------------------------------------------------


apply(Boston, 2, function(x) length(unique(x)))


tmp_bst = Boston
tmp_bst$chas = as.factor(tmp_bst$chas)
tmp_bst$rad = as.factor(tmp_bst$rad)

preds_TEST = KernelKnn(tmp_bst[spl_train, -ncol(tmp_bst)], 
                       
                       TEST_data = tmp_bst[spl_test, -ncol(tmp_bst)], 
                       
                       y[spl_train], k = 5 , method = 'euclidean', 
                       
                       regression = T, transf_categ_cols = T)
str(preds_TEST)


## ---- eval=T------------------------------------------------------------------


preds_TEST_biw = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 5, 
                           
                           method = 'mahalanobis', weights_function = 'biweight', 
                           
                           regression = T, transf_categ_cols = F)
str(preds_TEST_biw)


## ---- eval=T------------------------------------------------------------------


norm_kernel = function(W) {
  
  W = dnorm(W, mean = 0, sd = 1.0)
  
  W = W / rowSums(W)
  
  return(W)
}


preds_TEST_norm = KernelKnn(X[spl_train, ], TEST_data = X[spl_test, ], y[spl_train], k = 5,
                            
                            method = 'mahalanobis', weights_function = norm_kernel, 
                            
                            regression = T, transf_categ_cols = F)
str(preds_TEST_norm)


## ---- eval = T, echo = F------------------------------------------------------

 knitr::kable(data.frame(k = c(9,3), method = c('mahalanobis', 'canberra'), kernel = c('triweight', 'cosine')))

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------

fit_cv_pair1 = KernelKnnCV(X, y, k = 9, folds = 3, method = 'mahalanobis', 
                           
                           weights_function = 'triweight', regression = T, 
                           
                           threads = 5, seed_num = 3)

## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair1)

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------
fit_cv_pair2 = KernelKnnCV(X, y, k = 3, folds = 3, method = 'canberra',
                           
                           weights_function = 'cosine', regression = T, 
                           
                           threads = 5, seed_num = 3)

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------
str(fit_cv_pair2)


## ---- eval=T------------------------------------------------------------------
mse_pair1 = unlist(lapply(1:length(fit_cv_pair1$preds), 
                          
                          function(x) mse(y[fit_cv_pair1$folds[[x]]], 
                                          
                                          fit_cv_pair1$preds[[x]])))
mse_pair1

cat('mse for params_pair1 is :', mean(mse_pair1), '\n')

mse_pair2 = unlist(lapply(1:length(fit_cv_pair2$preds), 
                          
                          function(x) mse(y[fit_cv_pair2$folds[[x]]], 
                                          
                                          fit_cv_pair2$preds[[x]])))
mse_pair2

cat('mse for params_pair2 is :', mean(mse_pair2), '\n')


## ---- eval = T, echo = F------------------------------------------------------

 knitr::kable(data.frame(k = c(19,18), method = c('mahalanobis', 'mahalanobis'), kernel = c('triangular_triweight_MULT', 'biweight_triweight_gaussian_MULT')))

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------

fit_cv_pair1 = KernelKnnCV(X, y, k = 19, folds = 3, method = 'mahalanobis', 
                           
                           weights_function = 'triangular_triweight_MULT', 
                           
                           regression = T, threads = 5, seed_num = 3)

## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair1)

## ---- eval=T, warning = FALSE, message = FALSE, results = 'hide'--------------
fit_cv_pair2 = KernelKnnCV(X, y, k = 18, folds = 3, method = 'mahalanobis', 
                           
                           weights_function = 'biweight_triweight_gaussian_MULT', 
                           
                           regression = T, threads = 5, seed_num = 3)

## ---- eval=T------------------------------------------------------------------
str(fit_cv_pair2)


## ---- eval=T------------------------------------------------------------------
mse_pair1 = unlist(lapply(1:length(fit_cv_pair1$preds), 
                          
                          function(x) mse(y[fit_cv_pair1$folds[[x]]], 
                                          
                                          fit_cv_pair1$preds[[x]])))
mse_pair1

cat('mse for params_pair1 is :', mean(mse_pair1), '\n')

mse_pair2 = unlist(lapply(1:length(fit_cv_pair2$preds), 
                          
                          function(x) mse(y[fit_cv_pair2$folds[[x]]], 
                                          
                                          fit_cv_pair2$preds[[x]])))
mse_pair2

cat('mse for params_pair2 is :', mean(mse_pair2), '\n')

Try the KernelKnn package in your browser

Any scripts or data that you put into this service are public.

KernelKnn documentation built on Jan. 7, 2023, 1:18 a.m.