Nothing
#' C5.0 Decision Trees and Rule-Based Model
#'
#' Fit classification tree models or rule-based models using Quinlan's C5.0
#' algorithm.
#'
#' @param trials integer number of boosting iterations.
#' @param rules logical indicating whether to decompose the tree into a
#' rule-based model.
#' @param subset logical indicating whether the model should evaluate groups of
#' discrete predictors for splits.
#' @param bands integer between 2 and 1000 specifying a number of bands into
#' which to group rules ordered by their affect on the error rate.
#' @param winnow logical indicating use of predictor winnowing (i.e. feature
#' selection).
#' @param noGlobalPruning logical indicating a final, global pruning step to
#' simplify the tree.
#' @param CF number in (0, 1) for the confidence factor.
#' @param minCases integer for the smallest number of samples that must be put
#' in at least two of the splits.
#' @param fuzzyThreshold logical indicating whether to evaluate possible
#' advanced splits of the data.
#' @param sample value between (0, 0.999) that specifies the random proportion
#' of data to use in training the model.
#' @param earlyStopping logical indicating whether the internal method for
#' stopping boosting should be used.
#'
#' @details
#' \describe{
#' \item{Response types:}{\code{factor}}
#' \item{\link[=TunedModel]{Automatic tuning} of grid parameters:}{
#' \code{trials}, \code{rules}, \code{winnow}
#' }
#' }
#'
#' Latter arguments are passed to \code{\link[C50]{C5.0Control}}.
#' Further model details can be found in the source link below.
#'
#' In calls to \code{\link{varimp}} for \code{C50Model}, argument \code{type}
#' may be specified as \code{"usage"} (default) for the percentage of training
#' set samples that fall into all terminal nodes after the split of each
#' predictor or as \code{"splits"} for the percentage of splits associated with
#' each predictor. Variable importance is automatically scaled to range from 0
#' to 100. To obtain unscaled importance values, set \code{scale = FALSE}. See
#' example below.
#'
#' @return \code{MLModel} class object.
#'
#' @seealso \code{\link[C50]{C5.0}}, \code{\link{fit}}, \code{\link{resample}}
#'
#' @examples
#' \donttest{
#' ## Requires prior installation of suggested package C50 to run
#'
#' model_fit <- fit(Species ~ ., data = iris, model = C50Model)
#' varimp(model_fit, method = "model", type = "splits", scale = FALSE)
#' }
#'
C50Model <- function(
trials = 1, rules = FALSE, subset = TRUE, bands = 0, winnow = FALSE,
noGlobalPruning = FALSE, CF = 0.25, minCases = 2, fuzzyThreshold = FALSE,
sample = 0, earlyStopping = TRUE
) {
MLModel(
name = "C50Model",
label = "C5.0 Classification",
packages = "C50",
response_types = "factor",
weights = TRUE,
predictor_encoding = "model.frame",
na.rm = FALSE,
params = new_params(environment()),
gridinfo = new_gridinfo(
param = c("trials", "rules", "winnow"),
get_values = c(
function(n, ...) round_int(c(1, seq_range(0, 10, c(2, 100), n))),
function(...) c(FALSE, TRUE),
function(...) c(FALSE, TRUE)
)
),
fit = function(formula, data, weights, trials, rules, ...) {
control <- C50::C5.0Control(...)
eval_fit(
data,
formula = C50::C5.0(
formula, data = data, weights = weights, trials = trials,
rules = rules, control = control
),
matrix = C50::C5.0(
x, y, weights = weights, trials = trials, rules = rules,
control = control
)
)
},
predict = function(object, newdata, ...) {
newdata <- as.data.frame(newdata)
predict(object, newdata = newdata, type = "prob")
},
varimp = function(object, type = c("usage", "splits"), ...) {
C50::C5imp(object, metric = match.arg(type))
}
)
}
MLModelFunction(C50Model) <- NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.