Description Usage Arguments Details Value Examples
View source: R/w_naive_bayes.R
Function for Naive Bayes algorithm classification with case weights.
1 2 3 4 5 | w_naive_bayes(x_train, y_train, w = NULL, discretize = TRUE, breaks = 3)
w_gaussian_naive_bayes(x_train, y_train, w = NULL)
w_discrete_naive_bayes(x_train, y_train, breaks = 3, w = NULL)
|
x_train |
explanatory variables. |
y_train |
a factor class variable. |
w |
a vector of case weights. |
discretize |
If |
breaks |
number of break points for discretization. Ignored if |
w_naive_bayes
calls w_gaussian_naive_bayes
or w_discrete_naive_bayes
.
if discrete = FALSE
, w_gaussian_naive_bayes
is called. It uses Gaussian densities with case weights and allows
multiclass classification.
if discrete = TRUE
, w_discrete_naive_bayes
is called. It uses conditional probabilities for each category with
laplace smoothing and allows multiclass classification.
a w_naive_bayes
object with below components.
n_train |
Number of cases in the input dataset. |
p |
Number of explanatory variables. |
x_classes |
A list of datasets, which are |
n_classes |
Number of cases for each class in input dataset. |
k_classes |
Number of classes in class variable. |
priors |
Prior probabilities. |
class_names |
Names of classes in class variable. |
means |
Weighted mean estimations for each variable. |
stds |
Weighted standart deviation estimations for each variable. |
categories |
Labels for discretized variables. |
boundaries |
Upper and lower boundaries for discretization. |
ps |
probabilities for each variable categories. |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | library(rbooster)
## short functions for cross-validation and data simulation
cv_sampler <- function(y, train_proportion) {
unlist(lapply(unique(y), function(m) sample(which(y==m), round(sum(y==m))*train_proportion)))
}
data_simulation <- function(n, p, k, train_proportion){
means <- seq(0, k*1.5, length.out = k)
x <- do.call(rbind, lapply(means,
function(m) matrix(data = rnorm(n = round(n/k)*p,
mean = m,
sd = 2),
nrow = round(n/k))))
y <- factor(rep(letters[1:k], each = round(n/k)))
train_i <- cv_sampler(y, train_proportion)
data <- data.frame(x, y = y)
data_train <- data[train_i,]
data_test <- data[-train_i,]
return(list(data = data,
data_train = data_train,
data_test = data_test))
}
### binary classification example
n <- 500
p <- 10
k <- 2
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]
x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]
x_test <- dat$data_test[,1:p]
y_test <- dat$data_test[,p+1]
## discretized Naive Bayes classification
mm1 <- w_naive_bayes(x_train = x_train, y_train = y_train, discretize = TRUE, breaks = 4)
preds1 <- predict(object = mm1, newdata = x_test, type = "pred")
table(y_test, preds1)
# or
mm2 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4)
preds2 <- predict(object = mm2, newdata = x_test, type = "pred")
table(y_test, preds2)
## Gaussian Naive Bayes classification
mm3 <- w_naive_bayes(x_train = x_train, y_train = y_train, discretize = FALSE)
preds3 <- predict(object = mm3, newdata = x_test, type = "pred")
table(y_test, preds3)
#or
mm4 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train)
preds4 <- predict(object = mm4, newdata = x_test, type = "pred")
table(y_test, preds4)
## multiclass example
n <- 500
p <- 10
k <- 5
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]
x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]
x_test <- dat$data_test[,1:p]
y_test <- dat$data_test[,p+1]
# discretized
mm5 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4)
preds5 <- predict(object = mm5, newdata = x_test, type = "pred")
table(y_test, preds5)
# gaussian
mm6 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train)
preds6 <- predict(object = mm6, newdata = x_test, type = "pred")
table(y_test, preds6)
## example for case weights
n <- 500
p <- 10
k <- 5
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]
x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]
# discretized
weights <- ifelse(y_train == "a" | y_train == "c", 1, 0.01)
mm7 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4, w = weights)
preds7 <- predict(object = mm7, newdata = x_test, type = "pred")
table(y_test, preds7)
# gaussian
weights <- ifelse(y_train == "b" | y_train == "d", 1, 0.01)
mm8 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train, w = weights)
preds8 <- predict(object = mm8, newdata = x_test, type = "pred")
table(y_test, preds8)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.