Nothing
#' Boosting methods model
#'
#' This class contains the classification model obtained by the CDA method.
#' @name boosting-class
#' @slot models List of models.
#' @slot x The learning set.
#' @slot y The target values.
#' @exportClass boosting
#' @seealso \code{\link{ADABOOST}}, \code{\link{BAGGING}}, \code{\link{predict.boosting}}
setClass ("boosting",
representation (models = "list",
x = "data.frame",
y = "ANY"))
#' Canonical Disciminant Analysis model
#'
#' This class contains the classification model obtained by the CDA method.
#' @name cda-class
#' @slot proj The projection of the dataset into the canonical base. A \code{data.frame}.
#' @slot transform The transformation matrix between. A \code{matrix}.
#' @slot centers Coordinates of the class centers. A \code{matrix}.
#' @slot within The intr-class covarianc matrix. A \code{matrix}.
#' @slot eig The eigen-values. A \code{matrix}.
#' @slot dim The number of dimensions of the canonical base (numeric value).
#' @slot nb.classes The number of clusters (numeric value).
#' @slot train The training set (description). A \code{data.frame}.
#' @slot labels Class labels of the training set. Either a \code{factor} or an integer \code{vector}.
#' @slot model The prediction model.
#' @exportClass cda
#' @seealso \code{\link{CDA}}, \code{\link{plot.cda}}, \code{\link{predict.cda}}
setClass ("cda",
representation (proj = "data.frame",
transform = "matrix",
centers = "matrix",
within = "matrix",
eig = "matrix",
dim = "numeric",
nb.classes = "numeric",
train = "data.frame",
labels = "factor",
model = "ANY"))
#' Training set and test set
#'
#' This class contains a dataset divided into four parts: the training set and test set, description and class labels.
#' @name dataset-class
#' @slot train.x the training set (description), as a \code{data.frame} or a \code{matrix}.
#' @slot train.y the training set (target), as a \code{vector} or a \code{factor}.
#' @slot test.x the training set (description), as a \code{data.frame} or a \code{matrix}.
#' @slot test.y the training set (target), as a \code{vector} or a \code{factor}.
#' @exportClass dataset
#' @seealso \code{\link{splitdata}}
setClass ("dataset",
representation (train.x = "ANY",
train.y = "ANY",
test.x = "ANY",
test.y = "ANY"))
#' K Nearest Neighbours model
#'
#' This class contains the classification model obtained by the k-NN method.
#' @name knn-class
#' @slot train The training set (description). A \code{data.frame}.
#' @slot labels Class labels of the training set. Either a \code{factor} or an integer \code{vector}.
#' @slot k The \code{k} parameter.
#' @exportClass knn
#' @seealso \code{\link{KNN}}, \code{\link{predict.knn}}
setClass ("knn",
representation (train = "data.frame",
labels = "factor",
k = "numeric"))
#' Generic classification or regression model
#'
#' This is a wrapper class containing the classification model obtained by any classification or regression method.
#' @name model-class
#' @slot model The wrapped model.
#' @slot method The name of the method.
#' @exportClass model
#' @seealso \code{\link{predict.model}}, \code{\link[stats]{predict}}
setClass ("model",
representation (model = "ANY",
method = "character"))
#' Learning Parameters
#'
#' This class contains main parameters for various learning methods.
#' @name params-class
#' @slot decay The decay parameter.
#' @slot hidden The number of hidden nodes.
#' @slot epsilon The epsilon parameter.
#' @slot gamma The gamma parameter.
#' @slot cost The cost parameter.
#' @exportClass params
#' @seealso \code{\link{MLP}}, \code{\link{MLPREG}}, \code{\link{SVM}}, \code{\link{SVR}}
setClass ("params",
representation (decay = "numeric",
hidden = "numeric",
epsilon = "numeric",
gamma = "numeric",
cost = "numeric"))
#' @keywords internal
adaboost.m1 <-
function (x, y, learningmethod, nsamples, seed = NULL, ...)
{
set.seed (seed)
if (is.vector (x))
x = matrix (x, ncol = 1)
w = rep (1 / nrow (x), nrow (x))
epsilon = 0
models = NULL
iteration = 0
while ((epsilon < .5) & (iteration < nsamples))
{
iteration = iteration + 1
prob = w / sum (w)
s = sample (nrow (x), nrow (x), replace = TRUE, prob = prob)
xx = x [s, ]
yy = y [s]
model = learningmethod (xx, yy, ...)
model$boostx = xx
model$boosty = yy
model$boostprob = prob
rho = ifelse (predict (model, x) == y, 0, 1)
epsilon = sum (prob * rho)
if (epsilon > 0)
{
beta = epsilon / (1 - epsilon)
model$boostweight = log (1 / beta)
w = w * (beta^(1-rho))
if (beta < 1)
models = c (models, list (model))
}
}
res = list (models = models, x = x, y = y)
class (res) = "boosting"
return (res)
}
#' @keywords internal
adaboost.m2 <-
function (x, y, learningmethod, nsamples, seed = NULL, ...)
{
set.seed (seed)
if (is.vector (x))
x = matrix (x, ncol = 1)
Y = cbind (1:nrow (x), as.numeric (y))
k = nlevels (y)
D = rep (1, k * nrow (x))
w = matrix (D / (k - 1), ncol = k)
w [Y] = 0
epsilon = 0
models = NULL
iteration = 0
while ((epsilon < .5) & (iteration < nsamples))
{
iteration = iteration + 1
W = apply (w, 1, sum)
q = sweep (w, 1, W, "/")
D = W / sum (W)
s = sample (nrow (x), nrow (x), replace = TRUE, prob = D)
xx = x [s, ]
yy = y [s]
model = learningmethod (xx, yy, ...)
if (!is.null (model))
{
model$boostx = xx
model$boosty = yy
model$boostprob = D
pred = predict (model, x, fuzzy = TRUE)
if (!any (is.na (pred)))
{
rho = 1 - pred [Y] + apply (q * pred, 1, sum)
epsilon = .5 * sum (D * rho)
if (epsilon > 0)
{
beta = epsilon / (1 - epsilon)
model$boostweight = log (1 / beta)
exponent = .5 * (1 + sweep (-pred, 1, pred [Y], "+"))
w = w * beta^exponent
if (beta < 1)
models = c (models, list (model))
}
}
}
}
res = list (models = models, x = x, y = y)
class (res) = "boosting"
return (res)
}
#' Classification using AdaBoost
#'
#' Ensemble learning, through AdaBoost Algorithm.
#' @name ADABOOST
#' @param x The dataset (description/predictors), a \code{matrix} or \code{data.frame}.
#' @param y The target (class labels or numeric values), a \code{factor} or \code{vector}.
#' @param learningmethod The boosted method.
#' @param nsamples The number of samplings.
#' @param fuzzy Indicates whether or not fuzzy classification should be used or not.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param seed A specified seed for random number generation.
#' @param ... Other specific parameters for the leaning method.
#' @return The classification model.
#' @export
#' @seealso \code{\link{BAGGING}}, \code{\link{predict.boosting}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' ADABOOST (iris [, -5], iris [, 5], NB)
#' }
ADABOOST <-
function (x, y, learningmethod, nsamples = 100, fuzzy = FALSE, tune = FALSE, seed = NULL, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else
{
if (is.factor (y))
{
if (fuzzy)
res = adaboost.m2 (x, y, learningmethod, nsamples, seed, ...)
else
res = adaboost.m1 (x, y, learningmethod, nsamples, seed, ...)
}
else
res = NULL
}
return (res)
}
#' Classification using Bagging
#'
#' Ensemble learning, through Bagging Algorithm.
#' @name BAGGING
#' @param x The dataset (description/predictors), a \code{matrix} or \code{data.frame}.
#' @param y The target (class labels or numeric values), a \code{factor} or \code{vector}.
#' @param learningmethod The boosted method.
#' @param nsamples The number of samplings.
#' @param bag.size The size of the samples.
#' @param seed A specified seed for random number generation.
#' @param ... Other specific parameters for the leaning method.
#' @return The classification model.
#' @export
#' @seealso \code{\link{ADABOOST}}, \code{\link{predict.boosting}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' BAGGING (iris [, -5], iris [, 5], NB)
#' }
BAGGING <-
function (x, y, learningmethod, nsamples = 100, bag.size = nrow (x), seed = NULL, ...)
{
set.seed (seed)
if (is.vector (x))
x = matrix (x, ncol = 1)
s = matrix (sample (nrow (x), nsamples * bag.size, replace = TRUE), ncol = nsamples)
models = apply (s, 2, function (v)
{
train = x [v, ]
target = y [v]
model = learningmethod (train, target, ...)
model$boostweight = 1
model$boostx = train
model$boosty = target
return (model)
})
res = list (models = models, x = x, y = y)
class (res) = "boosting"
return (res)
}
#' Classification using CART
#'
#' This function builds a classification model using CART.
#' @name CART
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param minsplit The minimum leaf size during the learning.
#' @param maxdepth Set the maximum depth of any node of the final tree, with the root node counted as depth 0.
#' @param cp The complexity parameter of the tree. Cross-validation is used to determine optimal cp if NULL.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link{cartdepth}}, \code{\link{cartinfo}}, \code{\link{cartleafs}}, \code{\link{cartnodes}}, \code{\link{cartplot}}, \code{\link[rpart]{rpart}}
#' @examples
#' require (datasets)
#' data (iris)
#' CART (iris [, -5], iris [, 5])
CART <-
function (train, labels, minsplit = 1, maxdepth = log2 (length (labels)), cp = NULL, tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else
{
if (is.vector (train))
{
train = matrix (train, ncol = 1)
colnames (train) = "X"
}
d = cbind.data.frame (Class = labels, as.data.frame (train))
complexity = cp
if (is.null (cp))
{
model = rpart::rpart (Class~., d, minsplit = minsplit, xval = nrow (d), maxdepth = maxdepth, maxcompete = 0, model = TRUE)
mini = which.min (model$cptable [, 4])
threshold = model$cptable [mini, 4] + model$cptable [mini, 5]
complexity = model$cptable [which (model$cptable [, 4] < threshold) [1], 1]
}
model = rpart::rpart (Class~., d, minsplit = minsplit, cp = complexity, maxdepth = maxdepth, maxcompete = 0, model = TRUE)
type = ifelse (is.factor (labels), "class", "reg")
res = list (model = model, method = "CART", type = type)
class (res) = "model"
}
return (res)
}
#' Depth
#'
#' Return the dept of a decision tree.
#' @name cartdepth
#' @param model The decision tree.
#' @return The depth.
#' @export
#' @seealso \code{\link{CART}}, \code{\link{cartinfo}}, \code{\link{cartleafs}}, \code{\link{cartnodes}}, \code{\link{cartplot}}
#' @examples
#' require (datasets)
#' data (iris)
#' model = CART (iris [, -5], iris [, 5])
#' cartdepth (model)
cartdepth <-
function (model)
{
return (ceiling (max (log (as.numeric (rownames (model$model$frame)), 2))) - 1)
}
#' CART information
#'
#' Return various information on a CART model.
#' @name cartinfo
#' @param model The decision tree.
#' @return Various information organized into a vector.
#' @export
#' @seealso \code{\link{CART}}, \code{\link{cartdepth}}, \code{\link{cartleafs}}, \code{\link{cartnodes}}, \code{\link{cartplot}}
#' @examples
#' require (datasets)
#' data (iris)
#' model = CART (iris [, -5], iris [, 5])
#' cartinfo (model)
cartinfo <-
function (model)
{
return (c (Nodes = cartnodes (model), Leafs = cartleafs (model), Depth = cartdepth (model)))
}
#' Number of Leafs
#'
#' Return the number of leafs of a decision tree.
#' @name cartleafs
#' @param model The decision tree.
#' @return The number of leafs.
#' @export
#' @seealso \code{\link{CART}}, \code{\link{cartdepth}}, \code{\link{cartinfo}}, \code{\link{cartnodes}}, \code{\link{cartplot}}
#' @examples
#' require (datasets)
#' data (iris)
#' model = CART (iris [, -5], iris [, 5])
#' cartleafs (model)
cartleafs <-
function (model)
{
return (sum (model$model$frame$var == "<leaf>"))
}
#' Number of Nodes
#'
#' Return the number of nodes of a decision tree.
#' @name cartnodes
#' @param model The decision tree.
#' @return The number of nodes.
#' @export
#' @seealso \code{\link{CART}}, \code{\link{cartdepth}}, \code{\link{cartinfo}}, \code{\link{cartleafs}}, \code{\link{cartplot}}
#' @examples
#' require (datasets)
#' data (iris)
#' model = CART (iris [, -5], iris [, 5])
#' cartnodes (model)
cartnodes <-
function (model)
{
return (length (model$model$frame$var))
}
#' CART Plot
#'
#' Plot a decision tree obtained by CART.
#' @name cartplot
#' @param model The decision tree.
#' @param ... Other parameters.
#' @export
#' @seealso \code{\link{CART}}, \code{\link{cartdepth}}, \code{\link{cartinfo}}, \code{\link{cartleafs}}, \code{\link{cartnodes}}
#' @examples
#' require (datasets)
#' data (iris)
#' model = CART (iris [, -5], iris [, 5])
#' cartplot (model)
cartplot <-
function (model, ...)
{
col = as.list (sort (unique (as.numeric (model$model$model$Class) + 1)))
rpart.plot::rpart.plot (model$model, box.palette = col, type = 0)
}
#' Classification using Canonical Discriminant Analysis
#'
#' This function builds a classification model using Canonical Discriminant Analysis.
#' @name CDA
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model, as an object of class \code{glmnet}.
#' @export
#' @seealso \code{\link{plot.cda}}, \code{\link{predict.cda}}, \code{\link{cda-class}}
#' @examples
#' require (datasets)
#' data (iris)
#' CDA (iris [, -5], iris [, 5])
CDA <-
function (train, labels, tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else if (length (unique (labels)) == nlevels (labels))
{
ll = factor (labels)
m = scale (train, scale = FALSE)
n = nrow (m)
l = levels (ll)
k = nlevels (ll)
dim = min (k - 1, ncol (m))
V = t (m) %*% m / n
class = matrix (m [ll == l [1],], ncol = ncol (m))
g = apply (class, 2, mean)
B = nrow (class) * g %*% t (g)
for (i in 2:k)
{
class = matrix (m [ll == l [i],], ncol = ncol (m))
g = apply (class, 2, mean)
B = B + (nrow (class) * g %*% t (g))
}
B = B / n
s = eigen (solve (V) %*% B)
t = s$vectors [, 1:dim]
p = m %*% t
W = V - B
o = apply (matrix (m [ll == l [1], ], ncol = ncol (m)), 2, mean)
for (i in 2:k)
o = rbind (o, apply (matrix (m [ll == l [i], ], ncol = ncol (m)), 2, mean))
colnames (o) = colnames (train)
rownames (o) = l
if (dim > 1)
{
colnames (p) = paste ("Can.", 1:dim)
e = s$values [1:dim]
e = cbind (e, 100 * e^2 / sum (e^2))
e = cbind (e, cumsum (e [, 2]))
colnames (t) = paste ("Can.", 1:dim)
rownames (t) = colnames (train)
colnames (e) = c ("eigenvalue", "percentage of variance", "cumulative percentage of variance")
rownames (e) = paste ("Can.", 1:dim)
}
else
{
e = c (s$values [1], 100, 100)
names (t) = colnames (train)
names (e) = c ("eigenvalue", "percentage of variance", "cumulative percentage of variance")
}
prior = rep (1 / k, k)
if (is.vector (train))
train = matrix (train, ncol = 1)
model = MASS::lda (labels ~ ., as.data.frame (train), prior = prior)
res = list (proj = as.data.frame (Re (p)),
transform = Re (t),
centers = Re (o),
eig = Re (e),
within = Re (W),
dim = dim,
nb.classes = k,
train = as.data.frame (train),
labels = ll,
model = model)
class (res) = "cda"
}
else
message (paste ("Missing classe(s):", levels (labels) [table (labels) == 0]))
return (res)
}
#' @keywords internal
cda.transform <-
function (model, newdata) t (t (newdata) - apply (model$train, 2, mean)) %*% model$transform
#' Confuion matrix
#'
#' Plot a confusion matrix.
#' @name confusion
#' @param predictions The prediction.
#' @param gt The ground truth.
#' @param norm Whether or not the confusion matrix is normalized
#' @param graph Whether or not a graphic is displayed.
#' @return The confusion matrix.
#' @export
#' @seealso \code{\link{evaluation}}, \code{\link{performance}}, \code{\link{splitdata}}
#' @examples
#' require ("datasets")
#' data (iris)
#' d = splitdata (iris, 5)
#' model = NB (d$train.x, d$train.y)
#' pred = predict (model, d$test.x)
#' confusion (d$test.y, pred)
confusion <-
function (predictions, gt, norm = TRUE, graph = TRUE)
{
graphics::layout (matrix (1:2, ncol = 2), width = c (2, 1), height = c (1, 1))
on.exit (graphics::layout (1))
conf = table (gt, predictions, dnn = c ("True lables", "Predicted labels"))
color = NULL
maxval = 1
if (norm)
{
conf = sweep (conf, 1, rowSums (conf), "/")
color = (conf * 100) + 1
} else {
color = round (100 * conf / max (conf)) + 1
maxval = max (conf)
}
if (graph)
{
palette = grDevices::colorRampPalette (c ("#FAFAFF", "blue")) (101)
graphics::plot (c (0, ncol (conf) + 1), c (0, nrow (conf) + 1), col = 0,
xlim = c (1, ncol (conf) + 1), ylim = c (1, nrow (conf) + 1),
xaxs = "i", yaxs = "i", xlab = "", ylab = "",
asp = 1, axes = FALSE,
main = "Confusion matrix")
for (rrow in 1:nrow (conf))
for (col in 1:ncol (conf))
{
row = 1 + nrow (conf) - rrow
graphics::polygon (x = c (col, col, col + 1, col + 1), y = c (row, row + 1, row + 1, row),
col = palette [color [rrow, col]], border = FALSE)
graphics::text (col + .5, row + .5, round (conf [rrow, col], 2))
}
cex = min (5, .15 / log10 (1 + max (graphics::strwidth (c (levels (gt), levels (predictions))))))
graphics::polygon (x = c (1, 1, ncol (conf) + 1, ncol (conf) + 1), y = c (1, nrow (conf) + 1, nrow (conf) + 1, 1))
graphics::axis (side = 1, at = seq (1.5, by = 1, length.out = ncol (conf)), lwd = 0, lwd.ticks = 1,
labels = levels (gt), pos = 1, cex.axis = cex)
graphics::axis (side = 2, at = seq (1.5, by = 1, length.out = nrow (conf)), lwd = 0, lwd.ticks = 1,
labels = rev (levels (predictions)), pos = 1, cex.axis = cex)
graphics::title (xlab = "Predicted labels")
graphics::mtext ("True labels", side = 2, line = 3)
raster = grDevices::as.raster (matrix (rev (palette), ncol = 1))
graphics::plot (c (0, 3), c (0, 1), type = 'n', axes = F, xlab = '', ylab = '')
graphics::rasterImage (raster, 0, 0, 1, 1)
labels = NULL
if (maxval == 1)
labels = seq (0, 1, l = 5)
else
labels = round (seq (0, maxval, l = 5))
graphics::axis (side = 4, at = seq (0, 1, l = 5), lwd = 0, lwd.ticks = 1,
labels = labels, pos = 1, cex.axis = 1, las=2)
}
return (conf)
}
#' Plot Cost Curves
#'
#' This function plots Cost Curves of several classification predictions.
#' @name cost.curves
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt Actual labels of the dataset (\code{factor} or \code{vector}).
#' @param methods.names The name of the compared methods (\code{vector}).
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{roc.curves}}, \code{\link{performance}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = iris
#' levels (d [, 5]) = c ("+", "+", "-") # Building a two classes dataset
#' model.nb = NB (d [, -5], d [, 5])
#' model.lda = LDA (d [, -5], d [, 5])
#' pred.nb = predict (model.nb, d [, -5])
#' pred.lda = predict (model.lda, d [, -5])
#' cost.curves (cbind (pred.nb, pred.lda), d [, 5], c ("NB", "LDA"))
cost.curves <-
function (predictions, gt, methods.names = NULL)
{
if (is.factor (predictions))
{
pred = ROCR::prediction (as.numeric (predictions), as.numeric (gt))
perf = ROCR::performance (pred, "ecost")
ROCR::plot (perf, xlab = "", ylab = "Error")
}
else
{
pred = ROCR::prediction (as.numeric (predictions [, 1]), as.numeric (gt))
perf = ROCR::performance (pred, "ecost")
ROCR::plot (perf, xlab = "", ylab = "Error")
for (i in 2:ncol (predictions))
{
pred = ROCR::prediction (as.numeric (predictions [, i]), as.numeric (gt))
perf = ROCR::performance (pred, "ecost")
ROCR::plot (perf, add = TRUE, lty = i, col = i)
}
if (!is.null (methods.names))
graphics::legend ("topleft", methods.names, lty = 1:ncol (predictions), col = 1:ncol (predictions), bty = "n")
}
}
#' @keywords internal
emptyparams <-
function ()
{
res = list ()
class (res) = "params"
return (res)
}
#' @keywords internal
eval.accuracy <-
function (predictions, gt, precision, recall, ...)
{
p = as.numeric (predictions)
l = as.numeric (gt)
sum (diag (table (p, l))) / length (l)
}
#' @keywords internal
eval.fmeasure <-
function (predictions, gt, precision = NULL, recall = NULL, beta = 1, positive = levels (gt) [1], ...)
{
if (is.null (precision))
precision = evaluation.precision (predictions, gt, positive)
if (is.null (recall))
recall = evaluation.recall (predictions, gt, positive)
res = (1 + beta * beta) * precision * recall / (beta * beta * precision + recall)
return (res)
}
#' @keywords internal
eval.fowlkesmallows <-
function (predictions, gt, precision = NULL, recall = NULL, positive = levels (gt) [1], ...)
{
if (is.null (precision))
precision = evaluation.precision (predictions, gt, positive)
if (is.null (recall))
recall = evaluation.recall (predictions, gt, positive)
res = sqrt (precision * recall)
return (res)
}
#' @keywords internal
eval.goodness <-
function (predictions, gt, beta = 1, precision = NULL, recall = NULL, positive = levels (gt) [1], ...)
{
if (is.null (precision))
precision = evaluation.precision (predictions, gt, positive)
if (is.null (recall))
recall = evaluation.recall (predictions, gt, positive)
res = (beta * precision + recall) / (beta + 1)
return (res)
}
#' @keywords internal
eval.jaccard <-
function (predictions, gt, precision = NULL, recall = NULL, positive = levels (gt) [1], ...)
{
if (is.null (precision))
precision = evaluation.precision (predictions, gt, positive)
if (is.null (recall))
recall = evaluation.recall (predictions, gt, positive)
pr = precision * recall
res = pr / (precision + recall - pr)
return (res)
}
#' @keywords internal
eval.kappa <-
function (predictions, gt, precision = NULL, recall = NULL, ...)
{
p = as.numeric (predictions)
l = as.numeric (gt)
irr::kappa2 (cbind (p, l), weight = "equal")$value
}
#' @keywords internal
eval.precision <-
function (predictions, gt, precision, recall, positive = levels (gt) [1], ...)
{
return (precision)
}
#' @keywords internal
eval.recall <-
function (predictions, gt, precision, recall, positive = levels (gt) [1], ...)
{
return (recall)
}
#' Evaluation of classification or regression predictions
#'
#' Evaluation predictions of a classification or a regression model.
#' @name evaluation
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth of the dataset (\code{factor} or \code{vector}).
#' @param eval The evaluation method.
#' @param ... Other parameters.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{confusion}}, \code{\link{evaluation.accuracy}}, \code{\link{evaluation.fmeasure}}, \code{\link{evaluation.fowlkesmallows}}, \code{\link{evaluation.goodness}}, \code{\link{evaluation.jaccard}}, \code{\link{evaluation.kappa}},
#' \code{\link{evaluation.precision}}, \code{\link{evaluation.recall}},
#' \code{\link{evaluation.msep}}, \code{\link{evaluation.r2}}, \code{\link{performance}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = splitdata (iris, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' # Default evaluation for classification
#' evaluation (pred.nb, d$test.y)
#' # Evaluation with two criteria
#' evaluation (pred.nb, d$test.y, eval = c ("accuracy", "kappa"))
#' data (trees)
#' d = splitdata (trees, 3)
#' model.linreg = LINREG (d$train.x, d$train.y)
#' pred.linreg = predict (model.linreg, d$test.x)
#' # Default evaluation for regression
#' evaluation (pred.linreg, d$test.y)
evaluation <-
function (predictions, gt, eval = ifelse (is.factor (gt), "accuracy", "r2"), ...)
{
precision = NULL
recall = NULL
if (is.factor (gt) & nlevels (gt) == 2)
{
precision = evaluation.precision (predictions = predictions, gt = gt, ...)
recall = evaluation.recall (predictions, gt, ...)
}
res = NULL
for (e in eval)
{
tmp = get (paste ("eval", e, sep = ".")) (predictions = predictions, gt = gt, precision = precision, recall = recall, ...)
res = c (res, tmp)
}
if (!is.null (res))
names (res) = eval
return (res)
}
#' Accuracy of classification predictions
#'
#' Evaluation predictions of a classification model according to accuracy.
#' @name evaluation.accuracy
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth (\code{factor} or \code{vector}).
#' @return The evaluation of the predictions (numeric value).
#' @param ... Other parameters.
#' @export
#' @seealso \code{\link{evaluation.fmeasure}}, \code{\link{evaluation.fowlkesmallows}}, \code{\link{evaluation.goodness}}, \code{\link{evaluation.jaccard}}, \code{\link{evaluation.kappa}}, \code{\link{evaluation.precision}},
#' \code{\link{evaluation.precision}}, \code{\link{evaluation.recall}},
#' \code{\link{evaluation}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = splitdata (iris, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' evaluation.accuracy (pred.nb, d$test.y)
evaluation.accuracy <-
function (predictions, gt, ...)
{
return (eval.accuracy (predictions, gt, NULL, NULL))
}
#' F-measure
#'
#' Evaluation predictions of a classification model according to the F-measure index.
#' @name evaluation.fmeasure
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth (\code{factor} or \code{vector}).
#' @param beta The weight given to precision.
#' @param positive The label of the positive class.
#' @param ... Other parameters.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{evaluation.accuracy}}, \code{\link{evaluation.fowlkesmallows}}, \code{\link{evaluation.goodness}}, \code{\link{evaluation.jaccard}}, \code{\link{evaluation.kappa}}, \code{\link{evaluation.precision}},
#' \code{\link{evaluation.precision}}, \code{\link{evaluation.recall}},
#' \code{\link{evaluation}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = iris
#' levels (d [, 5]) = c ("+", "+", "-") # Building a two classes dataset
#' d = splitdata (d, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' evaluation.fmeasure (pred.nb, d$test.y)
evaluation.fmeasure <-
function (predictions, gt, beta = 1, positive = levels (gt) [1], ...)
{
return (eval.fmeasure (predictions, gt, beta = beta, positive = positive))
}
#' Fowlkes–Mallows index
#'
#' Evaluation predictions of a classification model according to the Fowlkes–Mallows index.
#' @name evaluation.fowlkesmallows
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth (\code{factor} or \code{vector}).
#' @param positive The label of the positive class.
#' @param ... Other parameters.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{evaluation.accuracy}}, \code{\link{evaluation.fmeasure}}, \code{\link{evaluation.goodness}}, \code{\link{evaluation.jaccard}}, \code{\link{evaluation.kappa}}, \code{\link{evaluation.precision}},
#' \code{\link{evaluation.precision}}, \code{\link{evaluation.recall}},
#' \code{\link{evaluation}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = iris
#' levels (d [, 5]) = c ("+", "+", "-") # Building a two classes dataset
#' d = splitdata (d, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' evaluation.fowlkesmallows (pred.nb, d$test.y)
evaluation.fowlkesmallows <-
function (predictions, gt, positive = levels (gt) [1], ...)
{
return (eval.fowlkesmallows (predictions, gt, positive = positive))
}
#' Goodness
#'
#' Evaluation predictions of a classification model according to Goodness index.
#' @name evaluation.goodness
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth (\code{factor} or \code{vector}).
#' @param beta The weight given to precision.
#' @param positive The label of the positive class.
#' @param ... Other parameters.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{evaluation.accuracy}}, \code{\link{evaluation.fmeasure}}, \code{\link{evaluation.fowlkesmallows}}, \code{\link{evaluation.jaccard}}, \code{\link{evaluation.kappa}}, \code{\link{evaluation.precision}},
#' \code{\link{evaluation.precision}}, \code{\link{evaluation.recall}},
#' \code{\link{evaluation}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = iris
#' levels (d [, 5]) = c ("+", "+", "-") # Building a two classes dataset
#' d = splitdata (d, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' evaluation.goodness (pred.nb, d$test.y)
evaluation.goodness <-
function (predictions, gt, beta = 1, positive = levels (gt) [1], ...)
{
return (eval.goodness (predictions, gt, beta = beta, positive = positive))
}
#' Jaccard index
#'
#' Evaluation predictions of a classification model according to Jaccard index.
#' @name evaluation.jaccard
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth (\code{factor} or \code{vector}).
#' @param positive The label of the positive class.
#' @param ... Other parameters.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{evaluation.accuracy}}, \code{\link{evaluation.fmeasure}}, \code{\link{evaluation.fowlkesmallows}}, \code{\link{evaluation.goodness}}, \code{\link{evaluation.kappa}}, \code{\link{evaluation.precision}},
#' \code{\link{evaluation.precision}}, \code{\link{evaluation.recall}},
#' \code{\link{evaluation}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = iris
#' levels (d [, 5]) = c ("+", "+", "-") # Building a two classes dataset
#' d = splitdata (d, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' evaluation.jaccard (pred.nb, d$test.y)
evaluation.jaccard <-
function (predictions, gt, positive = levels (gt) [1], ...)
{
return (eval.fmeasure (predictions, gt, positive = positive))
}
#' Kappa evaluation of classification predictions
#'
#' Evaluation predictions of a classification model according to kappa.
#' @name evaluation.kappa
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth (\code{factor} or \code{vector}).
#' @param ... Other parameters.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{evaluation.accuracy}}, \code{\link{evaluation.fmeasure}}, \code{\link{evaluation.fowlkesmallows}}, \code{\link{evaluation.goodness}}, \code{\link{evaluation.jaccard}}, \code{\link{evaluation.kappa}}, \code{\link{evaluation.precision}},
#' \code{\link{evaluation.precision}}, \code{\link{evaluation.recall}},
#' \code{\link{evaluation}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = splitdata (iris, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' evaluation.kappa (pred.nb, d$test.y)
evaluation.kappa <-
function (predictions, gt, ...)
{
return (eval.kappa (predictions, gt))
}
#' Precision of classification predictions
#'
#' Evaluation predictions of a classification model according to precision. Works only for two classes problems.
#' @name evaluation.precision
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth (\code{factor} or \code{vector}).
#' @param positive The label of the positive class.
#' @param ... Other parameters.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{evaluation.accuracy}}, \code{\link{evaluation.fmeasure}}, \code{\link{evaluation.fowlkesmallows}}, \code{\link{evaluation.goodness}}, \code{\link{evaluation.jaccard}}, \code{\link{evaluation.kappa}},
#' \code{\link{evaluation.recall}},\code{\link{evaluation}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = iris
#' levels (d [, 5]) = c ("+", "+", "-") # Building a two classes dataset
#' d = splitdata (d, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' evaluation.precision (pred.nb, d$test.y)
evaluation.precision <-
function (predictions, gt, positive = levels (gt) [1], ...)
{
if (nlevels (gt) != 2)
stop ("evaluation.precision only works on data with two classes in the current implementation")
t = table (gt, predictions)
return (t [positive, positive] / sum (t [positive, ]))
}
#' Recall of classification predictions
#'
#' Evaluation predictions of a classification model according to recall. Works only for two classes problems.
#' @name evaluation.recall
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt The ground truth (\code{factor} or \code{vector}).
#' @param positive The label of the positive class.
#' @param ... Other parameters.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{evaluation.accuracy}}, \code{\link{evaluation.fmeasure}}, \code{\link{evaluation.fowlkesmallows}}, \code{\link{evaluation.goodness}}, \code{\link{evaluation.jaccard}}, \code{\link{evaluation.kappa}},
#' \code{\link{evaluation.precision}}, \code{\link{evaluation}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = iris
#' levels (d [, 5]) = c ("+", "+", "-") # Building a two classes dataset
#' d = splitdata (d, 5)
#' model.nb = NB (d$train.x, d$train.y)
#' pred.nb = predict (model.nb, d$test.x)
#' evaluation.recall (pred.nb, d$test.y)
evaluation.recall <-
function (predictions, gt, positive = levels (gt) [1], ...)
{
if (nlevels (gt) != 2)
stop ("evaluation.precision only works on data with two classes in the current implementation")
t = table (gt, predictions)
return (t [positive, positive] / sum (t [, positive]))
}
#' Classification using Gradient Boosting
#'
#' This function builds a classification model using Gradient Boosting
#' @name GRADIENTBOOSTING
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param ntree The number of trees in the forest.
#' @param learningrate The learning rate (between 0 and 1).
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link[xgboost]{xgboost}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' GRADIENTBOOSTING (iris [, -5], iris [, 5])
#' }
GRADIENTBOOSTING <-
function (train, labels,
ntree = 500,
learningrate = 0.3,
tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else
{
l = as.numeric (labels) - 1
k = nlevels (labels)
model = xgboost::xgboost (data = as.matrix (train), label = l, nrounds = ntree, objective = "multi:softprob", eval_metric = "mlogloss", num_class = k, verbose = 0)
res = list (model = model, lev = levels (labels), method = "XGB")
class (res) = "model"
}
return (res)
}
#' Classification using k-NN
#'
#' This function builds a classification model using Logistic Regression.
#' @name KNN
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param k The k parameter.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link[class]{knn}}
#' @examples
#' require (datasets)
#' data (iris)
#' KNN (iris [, -5], iris [, 5])
KNN <-
function (train, labels, k = 1:10, tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else
{
if (is.vector (train))
train = matrix (train, ncol = 1)
kk = k [1]
if (is.vector (k) && (length (k) > 1))
{
tunecontrol = e1071::tune.control (sampling = "bootstrap", nboot = 20, boot.size = 1)
kk = e1071::tune.knn (train, labels, k = k, tunecontrol = tunecontrol)$best.model$k
}
res = list (train = train, labels = labels, k = kk)
class (res) = "knn"
}
return (res)
}
#' Classification using Linear Discriminant Analysis
#'
#' This function builds a classification model using Linear Discriminant Analysis.
#' @name LDA
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link[MASS]{lda}}
#' @examples
#' require (datasets)
#' data (iris)
#' LDA (iris [, -5], iris [, 5])
LDA <-
function (train, labels, tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else
{
if (is.vector (train))
train = matrix (train, ncol = 1)
model = MASS::lda (x = train, grouping = labels)
res = list (model = model, method = "LDA")
class (res) = "model"
}
return (res)
}
#' Classification using Logistic Regression
#'
#' This function builds a classification model using Logistic Regression.
#' @name LR
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link[nnet]{multinom}}
#' @examples
#' require (datasets)
#' data (iris)
#' LR (iris [, -5], iris [, 5])
LR <-
function (train, labels, tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else if (length (unique (labels)) == nlevels (labels))
{
if (is.vector (train))
{
train = matrix (train, ncol = 1)
colnames (train) = "X"
}
data = cbind.data.frame (train, Class = labels)
model = nnet::multinom (formula = Class~., data, trace = FALSE)
res = list (model = model, method = "LR")
class (res) = "model"
}
else
message (paste ("Missing classes:", levels (labels) [table (labels) == 0]))
return (res)
}
#' Classification using Multilayer Perceptron
#'
#' This function builds a classification model using Multilayer Perceptron.
#' @name MLP
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param hidden The size of the hidden layer (if a vector, cross-over validation is used to chose the best size).
#' @param decay The decay (between 0 and 1) of the backpropagation algorithm (if a vector, cross-over validation is used to chose the best size).
#' @param methodparameters Object containing the parameters. If given, it replaces \code{size} and \code{decay}.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link[nnet]{nnet}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' MLP (iris [, -5], iris [, 5], hidden = 4, decay = .1)
#' }
MLP <-
function (train,
labels,
hidden = ifelse (is.vector (train), 2:(1 + nlevels (labels)), 2:(ncol (train) + nlevels (labels))),
decay = 10^(-3:-1),
methodparameters = NULL,
tune = FALSE,
...)
{
model = NULL
if (is.vector (train))
train = data.frame (X = train)
d = cbind.data.frame (Class = labels, train)
if (!is.null (methodparameters))
{
hidden = methodparameters$hidden
decay = methodparameters$decay
}
if (length (hidden) > 1 | length (decay) > 1)
{
tunecontrol = e1071::tune.control(sampling = "bootstrap", nboot = 20, boot.size = 1)
model = e1071::tune.nnet (Class~., data = d, size = hidden, decay = decay,
tunecontrol = tunecontrol, ...)$best.model
}
else
model = nnet::nnet (Class~., data = d, size = hidden, decay = decay, trace = FALSE, ...)
res = NULL
if (tune)
{
res = list (decay = model$decay, hidden = model$n [2])
class (res) = "params"
}
else
{
res = list (model = model, method = "MLP")
class (res) = "model"
}
return (res)
}
#' Classification using Naive Bayes
#'
#' This function builds a classification model using Naive Bayes.
#' @name NB
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link[e1071]{naiveBayes}}
#' @examples
#' require (datasets)
#' data (iris)
#' NB (iris [, -5], iris [, 5])
NB <-
function (train, labels, tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else
{
if (is.vector (train))
train = matrix (train, ncol = 1)
model = e1071::naiveBayes (train, labels)
res = list (model = model, method = "NB")
class (res) = "model"
}
return (res)
}
#' @keywords internal
panel.compare <-
function (x, y, ...)
{
graphics::points (x, y, col = "red")
graphics::abline (a = 0, b = 1, col = "blue")
}
#' Performance estimation
#'
#' Estimate the performance of classification or regression methods using bootstrap or crossvalidation (accuracy, ROC curves, confusion matrices, ...)
#' @name performance
#' @param methods The classification or regression methods to be evaluated.
#' @param train.x The dataset (description/predictors), a \code{matrix} or \code{data.frame}.
#' @param train.y The target (class labels or numeric values), a \code{factor} or \code{vector}.
#' @param test.x The test dataset (description/predictors), a \code{matrix} or \code{data.frame}.
#' @param test.y The (test) target (class labels or numeric values), a \code{factor} or \code{vector}.
#' @param train.size The size of the training set (holdout estimation).
#' @param type The type of evaluation (confusion matrix, ROC curve, ...)
#' @param protocol The evaluation protocol (crossvalidation, bootstrap, ...)
#' @param eval The evaluation functions.
#' @param nruns The number of bootstrap runs.
#' @param nfolds The number of folds (crossvalidation estimation).
#' @param new A logical value indicating whether a new plot should be be created or not (cost curves or ROC curves).
#' @param lty The line type (and color) specified as an integer (cost curves or ROC curves).
#' @param methodparameters Method parameters (if null tuning is done by cross-validation).
#' @param names Method names.
#' @param seed A specified seed for random number generation (useful for testing different method with the same bootstap samplings).
#' @param ... Other specific parameters for the leaning method.
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{confusion}}, \code{\link{evaluation}}, \code{\link{cost.curves}}, \code{\link{roc.curves}}
#' @examples
#' \dontrun{
#' require ("datasets")
#' data (iris)
#' # One method, one evaluation criterion, bootstrap estimation
#' performance (NB, iris [, -5], iris [, 5], seed = 0)
#' # One method, two evaluation criteria, train set estimation
#' performance (NB, iris [, -5], iris [, 5], eval = c ("accuracy", "kappa"),
#' protocol = "train", seed = 0)
#' # Three methods, ROC curves, LOOCV estimation
#' performance (c (NB, LDA, LR), linsep [, -3], linsep [, 3], type = "roc",
#' protocol = "loocv", seed = 0)
#' # List of methods in a variable, confusion matrix, hodout estimation
#' classif = c (NB, LDA, LR)
#' performance (classif, iris [, -5], iris [, 5], type = "confusion",
#' protocol = "holdout", seed = 0, names = c ("NB", "LDA", "LR"))
#' # List of strings (method names), scatterplot evaluation, crossvalidation estimation
#' classif = c ("NB", "LDA", "LR")
#' performance (classif, iris [, -5], iris [, 5], type = "scatter",
#' protocol = "crossvalidation", seed = 0)
#' # Actual vs. predicted
#' data (trees)
#' performance (LINREG, trees [, -3], trees [, 3], type = "avsp")
#' }
performance <-
function (methods, train.x, train.y, test.x = NULL, test.y = NULL, train.size = round (0.7 * nrow (train.x)), type = c ("evaluation", "confusion", "roc", "cost", "scatter", "avsp"),
protocol = c ("bootstrap", "crossvalidation", "loocv", "holdout", "train"),
eval = ifelse (is.factor (train.y), "accuracy", "r2"),
nruns = 10, nfolds = 10, new = TRUE, lty = 1,
seed = NULL, methodparameters = NULL, names = NULL, ...)
{
if (length (train.y) == 1)
{
index = train.y
train.y = train.x [, index]
train.x = train.x [, -index]
}
methodNames = names
if (is.character (methods))
{
methodNames = methods
methods = sapply (methods, get)
}
else
{
if (is.null (names))
{
methodNames = as.character (match.call ()$methods)
if (length (methodNames) > 1)
methodNames = methodNames [-1]
if (length (methodNames) != length (methods))
methodNames = NULL
}
}
if (is.vector (train.x))
train.x = data.frame (X = train.x)
set.seed (seed)
if (is.null (methodparameters))
{
if (length (methods) == 1)
methodparameters = methods (train.x, train.y, tune = TRUE, ...)
else
methodparameters = sapply (methods, function (method) method (train.x, train.y, tune = TRUE, ...))
}
tmp = get (paste ("protocol.", protocol [1], sep = "")) (methods = methods, train.x = train.x, train.y = train.y, test.x = test.x, test.y = test.y, train.size = train.size,
methodparameters = methodparameters, nruns = nruns, nfolds = nfolds, seed = seed, ...)
predictions = tmp$predictions
targets = tmp$targets
if (type [1] %in% c ("evaluation", "confusion", "roc", "cost", "avsp"))
{
if (length (methods) == 1)
predictions = unlist (predictions)
else
predictions = do.call ("rbind", predictions)
predictions = unlist (predictions)
targets = unlist (targets)
if (is.factor (train.y))
{
lab = levels (train.y)
l1 = length (lab)
if (length (methods) == 1)
{
l2 = length (unique (predictions))
if (l2 > l1)
lab = c (lab, rep ("Unknown", l2 - l1))
lab = lab [sort (unique (predictions))]
predictions = factor (predictions, labels = lab)
}
else
{
predictions = as.data.frame (predictions)
predictions = lapply (predictions, function (column) {
l2 = length (unique (column))
lab2 = lab
if (l2 > l1)
lab2 = c (lab, rep ("Unknown", l2 - l1))
lab2 = lab2 [sort (unique (column))]
return (factor (column, labels = lab2))
})
}
targets = factor (targets, labels = levels (train.y))
}
if (length (methods) > 1)
predictions = as.data.frame (predictions)
}
if (type [1] == "evaluation")
{
res = NULL
if (length (methods) == 1)
res = evaluation (predictions = predictions, gt = targets, eval = eval, ncol = ncol (train.x), ...)
else
{
res = t (as.data.frame (lapply (predictions, function (column) evaluation (predictions = column, gt = targets, eval = eval, ncol = ncol (train.x), ...))))
rownames (res) = methodNames
}
return (res)
}
else if (type [1] == "confusion")
{
res = NULL
if (length (methods) == 1)
res = confusion (targets, predictions, ...)
else
{
for (prediction in predictions)
res = c (res, list (confusion (targets, prediction, ...)))
names (res) = methodNames
}
return (res)
}
else if (type [1] == "avsp")
plotavsp (predictions, targets)
else if (type [1] == "roc")
roc.curves (predictions, targets, methodNames)
else if (type [1] == "cost")
cost.curves (predictions, targets, methodNames)
else if (type [1] == "scatter")
{
res = array (dim = c (length (targets), length (methods), length (eval)))
for (i in 1:length (targets))
{
gt = targets [[i]]
pred = predictions [[i]]
if (is.factor (train.y))
{
lab = levels (train.y)
l1 = length (lab)
if (length (methods) == 1)
{
l2 = length (unique (pred))
if (l2 > l1)
lab = c (lab, rep ("Unknown", l2 - l1))
lab = lab [sort (unique (pred))]
pred = factor (pred, labels = lab)
}
else
{
pred = as.data.frame (pred)
pred = lapply (pred, function (column) {
l2 = length (unique (column))
lab2 = lab
if (l2 > l1)
lab2 = c (lab, rep ("Unknown", l2 - l1))
lab2 = lab2 [sort (unique (column))]
return (factor (column, labels = lab2))
})
pred = as.data.frame (pred)
}
gt = factor (gt, labels = levels (train.y))
if (length (methods) == 1)
res [i, , ] = as.matrix (evaluation (predictions = pred, gt = gt, eval = eval, ncol = ncol (train.x), ...))
else
res [i, , ] = as.matrix (t (as.data.frame (lapply (pred, function (column) evaluation (predictions = column, gt = gt, eval = eval, ncol = ncol (train.x), ...)))))
}
}
for (i in 1:dim (res) [3])
{
if (dim (res) [2] == 1)
graphics::plot (sort (res [, 1, i]), col = "red", xlab = "", ylab = dimnames (res) [[3]][i], main = dimnames (res) [[2]][1])
else if (dim (res) [2] == 2)
{
dd = matrix (res [, 1:2, i], nrow = dim (res) [1])
colnames (dd) = methodNames
lim = c (min (dd), max (dd))
graphics::plot (dd, asp = 1, xlim = lim, ylim = lim, col = "red", main = dimnames (res) [[3]][i])
graphics::abline (a = 0, b = 1, col = "blue")
}
else
{
dd = matrix (res [, , i], nrow = dim (res) [1])
colnames (dd) = methodNames
lim = c (min (dd), max (dd))
graphics::pairs (dd, upper.panel = panel.compare, lower.panel = NULL, asp = 1, xlim = lim, ylim = lim)
}
}
}
else
message ("Unknown evaluation")
}
#' Plot function for cda-class
#'
#' Plot the learning set (and test set) on the canonical axes obtained by Canonical Discriminant Analysis (function \code{CDA}).
#' @name plot.cda
#' @param x The classification model (object of class \code{cda-class}).
#' @param newdata The test set (\code{matrix} or \code{data.frame}).
#' @param axes The canonical axes to be printed (numeric \code{vector}).
#' @param ... Other parameters.
#' @method plot cda
#' @export
#' @seealso \code{\link{CDA}}, \code{\link{predict.cda}}, \code{\link{cda-class}}
#' @examples
#' require (datasets)
#' data (iris)
#' model = CDA (iris [, -5], iris [, 5])
#' plot (model)
plot.cda <-
function (x, newdata = NULL, axes = 1:2, ...)
{
if (x$dim > 1)
{
graphics::layout (rbind (1, 2), heights = c (1, 7))
X = x$proj
col = x$labels
n = nrow (X)
pch = rep (1, n)
mar = graphics::par ()$mar
mar [3] = 1
opar = graphics::par (mar = c (0, 4, 0, 3))
on.exit (graphics::par (opar))
graphics::plot.new ()
labels = c ("Training set", "Class centers")
lpch = c (1, 19)
if (!is.null (newdata))
{
X = rbind (X, cda.transform (x, newdata))
col = c (col, predict.cda (x, newdata))
m = nrow (newdata)
pch = c (pch, rep (3, m))
labels = c (labels, "Test set")
lpch = c (lpch, 3)
}
graphics::legend ("topright", labels, pch = lpch, bty = "n")
graphics::legend ("topleft", levels (x$labels), col = 2:(nlevels (x$labels) + 1), lty = 1, bty = "n")
xlab = paste (colnames (X) [axes [1]], " (", round (x$eig [axes [1], 2], 2), " %)", sep = "")
ylab = paste (colnames (X) [axes [2]], " (", round (x$eig [axes [2], 2], 2), " %)", sep = "")
graphics::par (mar = mar)
graphics::plot (X [, axes], col = unclass (col) + 1, pch = pch, xlab = xlab, ylab = ylab, asp = 1)
for (i in 1:nlevels (x$labels)) graphics::points (t (apply(x$proj [x$labels == levels (x$labels) [i], axes], 2, mean)), col = i + 1, pch = 19)
}
else
{
graphics::layout (rbind (1, 2), heights = c (1, 7))
X = Re (x$proj [, 1])
col = x$labels
n = length (X)
pch = rep (1, n)
mar = graphics::par ()$mar
mar [3] = 1
opar = graphics::par (mar = c (0, 4, 0, 3))
on.exit (graphics::par (opar))
graphics::plot.new()
labels = c ("Training set", "Class centers")
lpch = c (1, 19)
if (!is.null (newdata))
{
X = c (X, Re (cda.transform (x, newdata)))
col = c (col, stats::predict (x, newdata))
m = nrow (newdata)
pch = c (pch, rep (3, m))
labels = c (labels, "Test set")
lpch = c (lpch, 3)
}
graphics::legend ("topright", labels, pch = lpch, bty = "n")
graphics::legend ("topleft", levels (x$labels), col = 2:(nlevels (x$labels) + 1), lty = 1, bty = "n")
xlab = "Index"
ylab = "Can. 1"
graphics::par (mar = mar)
X = cbind (1:n, X)
xx = X [1:n, ]
graphics::plot (X, col = unclass (col) + 1, pch = pch, xlab = xlab, ylab = ylab)
for (i in 1:nlevels (x$labels)) graphics::points (t (apply(xx [x$labels == levels (x$labels) [i], ], 2, mean)), col = i + 1, pch = 19)
}
graphics::layout (1)
}
#' Model predictions
#'
#' This function predicts values based upon a model trained by a boosting method.
#' @name predict.boosting
#' @param object The classification model (of class \code{\link{boosting-class}}, created by \code{\link{ADABOOST}} or \code{\link{BAGGING}}).
#' @param test The test set (a \code{data.frame})
#' @param fuzzy A boolean indicating whether fuzzy classification is used or not.
#' @return A vector of predicted values (\code{factor}).
#' @param ... Other parameters.
#' @export
#' @method predict boosting
#' @seealso \code{\link{ADABOOST}}, \code{\link{BAGGING}}, \code{\link{boosting-class}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' d = splitdata (iris, 5)
#' model = BAGGING (d$train.x, d$train.y, NB)
#' predict (model, d$test.x)
#' model = ADABOOST (d$train.x, d$train.y, NB)
#' predict (model, d$test.x)
#' }
predict.boosting <- function (object, test, fuzzy = FALSE, ...)
{
pred = lapply (object$models, function (model) predict (model, test, fuzzy, ...))
weights = sapply (object$models, function (model) model$boostweight)
res = NULL
if (is.factor (object$y))
{
if (fuzzy)
res = apply (simplify2array (pred), 1:2, mean)
else
{
pred = sapply (pred, function (v) v)
pred = apply (pred, 1, function (v) names (which.max (questionr::wtd.table (v, weights = weights))))
labels = levels (object$y)
pred = c (labels, pred)
res = factor (pred, levels = labels) [-(1:(length (labels)))]
}
}
else
{
pred = sapply (pred, function (v) v)
res = rowMeans (pred)
}
return (res)
}
#' Model predictions
#'
#' This function predicts values based upon a model trained by \code{\link{CDA}}.
#' @name predict.cda
#' @param object The classification model (of class \code{\link{cda-class}}, created by \code{\link{CDA}}).
#' @param test The test set (a \code{data.frame})
#' @param fuzzy A boolean indicating whether fuzzy classification is used or not.
#' @return A vector of predicted values (\code{factor}).
#' @param ... Other parameters.
#' @export
#' @method predict cda
#' @seealso \code{\link{CDA}}, \code{\link{plot.cda}}, \code{\link{cda-class}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = splitdata (iris, 5)
#' model = CDA (d$train.x, d$train.y)
#' predict (model, d$test.x)
predict.cda <-
function (object, test, fuzzy = FALSE, ...)
{
if (is.vector (test))
test = matrix (test, ncol = 1)
res = NULL
if (fuzzy)
res = stats::predict (object$model, as.data.frame (test))$posterior
else
res = stats::predict (object$model, as.data.frame (test))$class
return (res)
}
#' Model predictions
#'
#' This function predicts values based upon a model trained by \code{\link{KNN}}.
#' @name predict.knn
#' @param object The classification model (of class \code{\link[class]{knn}}).
#' @param test The test set (a \code{data.frame}).
#' @param fuzzy A boolean indicating whether fuzzy classification is used or not.
#' @param ... Other parameters.
#' @return A vector of predicted values (\code{factor}).
#' @export
#' @method predict knn
#' @seealso \code{\link{KNN}}, \code{\link{knn-class}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = splitdata (iris, 5)
#' model = KNN (d$train.x, d$train.y)
#' predict (model, d$test.x)
predict.knn <-
function (object, test, fuzzy = FALSE, ...)
{
if (is.vector (test))
test = matrix (test, ncol = 1)
res = NULL
if (fuzzy)
res = attr (caret::knn3Train (object$train, test, object$labels, object$k, prob = TRUE), "prob")
else
res = class::knn (object$train, test, object$labels, object$k)
return (res)
}
#' Model predictions
#'
#' This function predicts values based upon a model trained by any classification or regression model.
#' @name predict.model
#' @param object The classification model (of class \code{\link{cda-class}}, created by \code{\link{CDA}}).
#' @param test The test set (a \code{data.frame}).
#' @param fuzzy A boolean indicating whether fuzzy classification is used or not.
#' @param ... Other parameters.
#' @return A vector of predicted values (\code{factor}).
#' @export
#' @method predict model
#' @seealso \code{\link{model-class}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = splitdata (iris, 5)
#' model = LDA (d$train.x, d$train.y)
#' predict (model, d$test.x)
predict.model <-
function (object, test, fuzzy = FALSE, ...)
{
res = NULL
if (object$method == "CART")
{
if (is.vector (test))
{
test = matrix (test, ncol = 1)
colnames (test) = "X"
}
if (fuzzy)
res = stats::predict (object$model, as.data.frame (test))
else
{
if (object$type == "class")
res = stats::predict (object$model, as.data.frame (test), type = "class")
else
res = stats::predict (object$model, as.data.frame (test))
}
}
else if ((object$method == "LDA") | (object$method == "QDA"))
{
if (is.vector (test))
test = matrix (test, ncol = 1)
if (fuzzy)
{
res = stats::predict (object$model, test)$posterior
k = length (object$model$lev)
if (ncol (res) < k)
{
tmp = matrix (rep (0, k * nrow (test)), ncol = k)
colnames (tmp) = object$model$lev
tmp [, colnames (res)] = res
res = tmp
}
}
else
res = stats::predict (object$model, test)$class
}
else if (object$method == "lm")
{
if (is.vector (test))
test = data.frame (X = test)
res = stats::predict (object$model, test)
}
else if (object$method == "LR")
{
if (is.vector (test))
test = matrix (test, ncol = 1)
if (ncol (test) == 1)
colnames (test) = "X"
if (fuzzy)
{
res = stats::predict (object$model, test, type = "probs")
labels = object$model$lev
if (length (labels) == 2)
{
res = cbind (res, 1 - res)
colnames (res) = labels
}
}
else
res = stats::predict (object$model, test, type = "class")
}
else if (object$method == "MLP")
{
if (is.vector (test))
test = data.frame (X = test)
if (fuzzy)
{
res = stats::predict (object$model, test, type = "raw")
labels = object$model$lev
if (length (labels) == 2)
{
res = cbind (res, 1 - res)
colnames (res) = labels
}
}
else
{
pred = stats::predict (object$model, test, type = "class")
labels = object$model$lev
pred = c (labels, pred)
res = factor (pred, levels = labels) [-(1:(length (labels)))]
}
}
else if (object$method == "MLPREG")
{
d.norm = sweep (sweep (as.data.frame (test), 2, object$model$minimum [-1], FUN = "-"), 2, object$model$range [-1], FUN = "/")
colnames (d.norm) = attr (attr (object$model$model$terms, "factor"), "dimnames") [[1]] [-1]
res = (stats::predict (object$model$model, d.norm) * object$model$range [1]) + object$model$minimum [1]
}
else if (object$method == "MRV")
{
res = stats::predict (object$model, test, ncomp = object$model$ncomp)
}
else if (object$method == "NB")
{
type = "class"
if (fuzzy)
type = "raw"
if (is.vector (test))
test = matrix (test, ncol = 1)
res = stats::predict (object$model, test, type = type)
}
else if (object$method == "regularisation")
{
res = stats::predict (object$model, as.matrix (test))
}
else if (object$method == "RF")
{
if (is.vector (test))
test = matrix (test, ncol = 1)
res = stats::predict (object$model, test)
}
else if (object$method == "SVM")
{
if (fuzzy)
res = attr (stats::predict (object$model, test, probability = TRUE), "probabilities")
else
res = stats::predict (object$model, test, probability = FALSE)
}
else if (object$method == "XGB")
{
res = predict (object$model, as.matrix (test), reshape = T)
if (fuzzy)
colnames (res) = object$lev
else
{
levels = 1:length (object$lev)
res = factor (c (levels, apply (res, 1, which.max)), labels = object$lev) [-levels]
}
}
else
{
res = stats::predict (object$model, test, ...)
}
return (res)
}
#' @keywords internal
protocol.bootstrap <-
function (methods, train.x, train.y, methodparameters, nruns, seed, ...)
{
set.seed (seed)
predictions = NULL
targets = NULL
n = length (train.y)
indices = 1:length (methods)
samples = matrix (sample (n, n * nruns, replace = TRUE), ncol = nruns)
for (i in 1:nruns)
{
s = samples [, i]
targets = c (targets, list (train.y [-s]))
learn = train.x [s, ]
if (is.vector (learn))
learn = data.frame (X = learn)
test = train.x [-s, ]
if (is.vector (test))
test = data.frame (X = test)
rownames (learn) = 1:nrow (learn)
if (length (methods) == 1)
{
models = methods (learn, train.y [s], graph = FALSE, methodparameters = methodparameters, ...)
predictions = c (predictions, list (stats::predict (models, test, ...)))
}
else
{
models = lapply (indices, function (i) methods [[i]] (learn, train.y [s], graph = FALSE, methodparameters = methodparameters [[i]], ...))
predictions = c (predictions, list (sapply (models, function (model) as.numeric (stats::predict (model, test, ...)))))
}
}
return (list (predictions = predictions, targets = targets))
}
#' @keywords internal
protocol.crossvalidation <-
function (methods, train.x, train.y, methodparameters, nruns, nfolds, seed, ...)
{
set.seed (seed)
predictions = NULL
targets = NULL
n = length (train.y)
indices = 1:length (methods)
for (i in 1:nruns)
{
if (!is.null (seed))
set.seed (seed + i)
s = sample (n, n, replace = FALSE)
folds = rep (1:nfolds, diff (round ((n / nfolds) * 0:(nfolds))))
for (j in 1:nfolds)
{
slearn = s [folds != j]
stest = s [folds == j]
targets = c (targets, list (train.y [stest]))
learn = train.x [slearn, ]
if (is.vector (learn))
learn = data.frame (X = learn)
test = train.x [stest, ]
if (is.vector (test))
test = data.frame (X = test)
rownames (learn) = 1:nrow (learn)
if (length (methods) == 1)
{
models = methods (learn, train.y [slearn], graph = FALSE, methodparameters = methodparameters, ...)
predictions = c (predictions, list (stats::predict (models, test, ...)))
}
else
{
models = lapply (indices, function (i) methods [[i]] (learn, train.y [slearn], graph = FALSE, methodparameters = methodparameters [[i]], ...))
predictions = c (predictions, list (sapply (models, function (model) as.numeric (stats::predict (model, test, ...)))))
}
}
}
return (list (predictions = predictions, targets = targets))
}
#' @keywords internal
protocol.holdout <-
function (methods, train.x, train.y, test.x = NULL, test.y = NULL, train.size = round (0.7 * length (train.y)), methodparameters, seed, ...)
{
set.seed (seed)
if (is.null (test.x) | is.null (test.y))
{
set.seed (seed)
if (train.size < 1)
train.size = round (train.size * length (train.y))
s = sample (nrow (train.x), train.size)
test.x = train.x [-s, ]
train.x = train.x [s, ]
if (is.vector (train.x))
train.x = data.frame (X = train.x)
if (is.vector (train.x))
test.x = data.frame (X = test.x)
test.y = train.y [-s]
train.y = train.y [s]
}
set.seed (seed)
predictions = NULL
targets = test.y
indices = 1:length (methods)
if (length (methods) == 1)
predictions = predict (methods (train.x, train.y, graph = FALSE, methodparameters = methodparameters, ...), test.x)
else
{
models = lapply (indices, function (i) methods [[i]] (train.x, train.y, graph = FALSE, methodparameters = methodparameters [[i]], ...))
predictions = sapply (models, function (model) as.numeric (stats::predict (model, test.x, ...)))
}
return (list (predictions = list (predictions), targets = list (targets)))
}
#' @keywords internal
protocol.loocv <-
function (methods, train.x, train.y, methodparameters, seed, ...)
{
set.seed (seed)
predictions = NULL
targets = train.y
indices = 1:length (methods)
if (length (methods) == 1)
predictions = sapply (1:length (train.y), function (j) predict (methods (train.x [-j, ], train.y [-j], graph = FALSE, methodparameters = methodparameters, ...), train.x [j, ]))
else
{
predictions = t (sapply (1:length (train.y), function (j) {
models = lapply (indices, function (i) methods [[i]] (train.x [-j, ], train.y [-j], graph = FALSE, methodparameters = methodparameters [[i]], ...))
predictions = sapply (models, function (model) as.numeric (stats::predict (model, train.x [j, ], ...)))
}))
}
return (list (predictions = list (predictions), targets = list (targets)))
}
#' @keywords internal
protocol.train <-
function (methods, train.x, train.y, methodparameters, seed, ...)
{
set.seed (seed)
predictions = NULL
targets = train.y
indices = 1:length (methods)
if (length (methods) == 1)
predictions = predict (methods (train.x, train.y, graph = FALSE, methodparameters = methodparameters, ...), train.x)
else
{
models = lapply (indices, function (i) methods [[i]] (train.x, train.y, graph = FALSE, methodparameters = methodparameters [[i]], ...))
predictions = sapply (models, function (model) as.numeric (stats::predict (model, train.x, ...)))
}
return (list (predictions = list (predictions), targets = list (targets)))
}
#' Classification using Quadratic Discriminant Analysis
#'
#' This function builds a classification model using Quadratic Discriminant Analysis.
#' @name QDA
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link[MASS]{qda}}
#' @examples
#' require (datasets)
#' data (iris)
#' QDA (iris [, -5], iris [, 5])
QDA <-
function (train, labels, tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else
{
if (is.vector (train))
train = matrix (train, ncol = 1)
model = MASS::qda (x = train, grouping = labels)
res = list (model = model, method = "QDA")
class (res) = "model"
}
return (res)
}
#' Classification using Random Forest
#'
#' This function builds a classification model using Random Forest
#' @name RANDOMFOREST
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param ntree The number of trees in the forest.
#' @param nvar Number of variables randomly sampled as candidates at each split.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link[randomForest]{randomForest}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' RANDOMFOREST (iris [, -5], iris [, 5])
#' }
RANDOMFOREST <-
function (train, labels,
ntree = 500,
nvar = if (!is.null (labels) && !is.factor (labels)) max (floor (ncol (train)/3), 1) else floor (sqrt (ncol (train))),
tune = FALSE, ...)
{
res = NULL
if (tune)
res = emptyparams ()
else
{
if (is.vector (train))
train = matrix (train, ncol = 1)
res = randomForest::randomForest(x = train, y = labels, ntree = ntree, ntry = nvar, ...)
res = list (model = res, method = "RF")
class (res) = "model"
}
return (res)
}
#' Plot ROC Curves
#'
#' This function plots ROC Curves of several classification predictions.
#' @name roc.curves
#' @param predictions The predictions of a classification model (\code{factor} or \code{vector}).
#' @param gt Actual labels of the dataset (\code{factor} or \code{vector}).
#' @param methods.names The name of the compared methods (\code{vector}).
#' @return The evaluation of the predictions (numeric value).
#' @export
#' @seealso \code{\link{cost.curves}}, \code{\link{performance}}
#' @examples
#' require (datasets)
#' data (iris)
#' d = iris
#' levels (d [, 5]) = c ("+", "+", "-") # Building a two classes dataset
#' model.nb = NB (d [, -5], d [, 5])
#' model.lda = LDA (d [, -5], d [, 5])
#' pred.nb = predict (model.nb, d [, -5])
#' pred.lda = predict (model.lda, d [, -5])
#' roc.curves (cbind (pred.nb, pred.lda), d [, 5], c ("NB", "LDA"))
roc.curves <-
function (predictions, gt, methods.names = NULL)
{
if (is.factor (predictions))
{
pred = ROCR::prediction (as.numeric (predictions), as.numeric (gt))
perf = ROCR::performance (pred, "tpr", "fpr")
ROCR::plot (perf, asp = 1)
}
else
{
pred = ROCR::prediction (as.numeric (predictions [, 1]), as.numeric (gt))
perf = ROCR::performance (pred, "tpr", "fpr")
ROCR::plot (perf, asp = 1)
for (i in 2:ncol (predictions))
{
pred = ROCR::prediction (as.numeric (predictions [, i]), as.numeric (gt))
perf = ROCR::performance (pred, "tpr", "fpr")
ROCR::plot (perf, add = TRUE, lty = i, col = i)
}
if (!is.null (methods.names))
graphics::legend ("bottomright", methods.names, lty = 1:ncol (predictions), col = 1:ncol (predictions), bty = "n")
}
}
#' Classification using one-level decision tree
#'
#' This function builds a classification model using CART with maxdepth = 1.
#' @name STUMP
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param randomvar If true, the model uses a random variable.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other parameters.
#' @return The classification model.
#' @export
#' @seealso \code{\link{CART}}
#' @examples
#' require (datasets)
#' data (iris)
#' STUMP (iris [, -5], iris [, 5])
STUMP <-
function (train, labels, randomvar = TRUE, tune = FALSE, ...)
{
new = train
if (randomvar && (!is.vector (train)))
{
var = sample (ncol (train), 1)
new = matrix (train [, var], ncol = 1)
colnames (new) = colnames (train) [var]
}
return (CART (new, labels, minsplit = 1, maxdepth = 1, cp = 0, tune = tune, ...))
}
#' Classification using Support Vector Machine
#'
#' This function builds a classification model using Support Vector Machine.
#' @name SVM
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param gamma The gamma parameter (if a vector, cross-over validation is used to chose the best size).
#' @param cost The cost parameter (if a vector, cross-over validation is used to chose the best size).
#' @param kernel The kernel type.
#' @param methodparameters Object containing the parameters. If given, it replaces \code{gamma} and \code{cost}.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other arguments.
#' @return The classification model.
#' @export
#' @seealso \code{\link[e1071]{svm}}, \code{\link{SVMl}}, \code{\link{SVMr}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' SVM (iris [, -5], iris [, 5], kernel = "linear", cost = 1)
#' SVM (iris [, -5], iris [, 5], kernel = "radial", gamma = 1, cost = 1)
#' }
SVM <-
function (train,
labels,
gamma = 2^(-3:3),
cost = 2^(-3:3),
kernel = c ("radial", "linear"),
methodparameters = NULL,
tune = FALSE,
...)
{
model = NULL
if (!is.null (methodparameters))
{
gamma = methodparameters$gamma
cost = methodparameters$cost
}
if (kernel [1] == "linear")
gamma = 0
if (length (gamma) > 1 | length (cost) > 1)
{
tunecontrol = e1071::tune.control(sampling = "bootstrap", nboot = 20, boot.size = 1)
model = e1071::tune.svm (train, labels,
gamma = gamma, cost = cost, kernel = kernel [1],
tunecontrol = tunecontrol, probability = TRUE, ...)$best.model
}
else
model = e1071::svm (train, labels, gamma = gamma, cost = cost, kernel = kernel [1], probability = TRUE, ...)
res = NULL
if (tune)
{
res = list (gamma = model$gamma, cost = model$cost)
class (res) = "params"
}
else
{
res = list (model = model, method = "SVM")
class (res) = "model"
}
return (res)
}
#' Classification using Support Vector Machine with a linear kernel
#'
#' This function builds a classification model using Support Vector Machine with a linear kernel.
#' @name SVMl
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param cost The cost parameter (if a vector, cross-over validation is used to chose the best size).
#' @param methodparameters Object containing the parameters. If given, it replaces \code{gamma} and \code{cost}.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other arguments.
#' @return The classification model.
#' @export
#' @seealso \code{\link[e1071]{svm}}, \code{\link{SVM}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' SVMl (iris [, -5], iris [, 5], cost = 1)
#' }
SVMl <-
function (train,
labels,
cost = 2^(-3:3),
methodparameters = NULL,
tune = FALSE,
...)
{
return (SVM (
train = train,
labels = labels,
gamma = NULL,
cost = cost,
kernel = "linear",
methodparameters = methodparameters,
tune = tune,
...
))
}
#' Classification using Support Vector Machine with a radial kernel
#'
#' This function builds a classification model using Support Vector Machine with a radial kernel.
#' @name SVMr
#' @param train The training set (description), as a \code{data.frame}.
#' @param labels Class labels of the training set (\code{vector} or \code{factor}).
#' @param gamma The gamma parameter (if a vector, cross-over validation is used to chose the best size).
#' @param cost The cost parameter (if a vector, cross-over validation is used to chose the best size).
#' @param methodparameters Object containing the parameters. If given, it replaces \code{gamma} and \code{cost}.
#' @param tune If true, the function returns paramters instead of a classification model.
#' @param ... Other arguments.
#' @return The classification model.
#' @export
#' @seealso \code{\link[e1071]{svm}}, \code{\link{SVM}}
#' @examples
#' \dontrun{
#' require (datasets)
#' data (iris)
#' SVMr (iris [, -5], iris [, 5], gamma = 1, cost = 1)
#' }
SVMr <-
function (train,
labels,
gamma = 2^(-3:3),
cost = 2^(-3:3),
methodparameters = NULL,
tune = FALSE,
...)
{
return (SVM (
train = train,
labels = labels,
gamma = gamma,
cost = cost,
kernel = "radial",
methodparameters = methodparameters,
tune = tune,
...
))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.