Nothing
#' Use FOIL to learn a rule set for classification
#'
#' Build a classifier rule base using FOIL (First Order Inductive Learner), a
#' greedy algorithm that learns rules to distinguish positive from negative
#' examples.
#'
#' Implements FOIL (Quinlan and Cameron-Jones, 1995) to learn rules and then
#' use them as a classifier following Xiaoxin and Han (2003).
#'
#' For each class, we find the positive and negative examples and learn the
#' rules using FOIL. Then the rules for all classes are combined and sorted by
#' Laplace accuracy on the training data.
#'
#' Following Xiaoxin and Han (2003), we classify new examples by
#' \enumerate{
#' \item select all the rules whose bodies are satisfied by the example;
#' \item
#' from the rules select the best k rules per class (highest expected Laplace
#' accuracy);
#' \item average the expected Laplace accuracy per class and choose
#' the class with the highest average.
#' }
#'
#' @aliases FOIL foil
#'
#' @param formula A symbolic description of the model to be fitted. Has to be
#' of form `class ~ .` or `class ~ predictor1 + predictor2`.
#' @param data A data.frame or [arules::transactions] containing the training data.
#' Data frames are automatically discretized and converted to transactions with
#' [prepareTransactions()].
#' @param max_len maximal length of the LHS of the created rules.
#' @param min_gain minimal gain required to expand a rule.
#' @param best_k use the average expected accuracy (laplace) of the best k
#' rules per class for prediction.
#' @param disc.method Discretization method used to discretize continuous
#' variables if data is a data.frame (default: `"mdlp"`). See
#' [discretizeDF.supervised()] for more supervised discretization methods.
#' @return Returns an object of class [CBA] representing the
#' trained classifier.
#' @author Michael Hahsler
#' @references Quinlan, J.R., Cameron-Jones, R.M. Induction of logic programs:
#' FOIL and related systems. NGCO 13, 287-312 (1995).
#' \doi{10.1007/BF03037228}
#'
#' Yin, Xiaoxin and Jiawei Han. CPAR: Classification based on Predictive
#' Association Rules, SDM, 2003.
#' \doi{10.1137/1.9781611972733.40}
#' @examples
#' data("iris")
#'
#' # learn a classifier using automatic default discretization
#' classifier <- FOIL(Species ~ ., data = iris)
#' classifier
#'
#' # inspect the rule base
#' inspect(classifier$rules)
#'
#' # make predictions for the first few instances of iris
#' predict(classifier, head(iris))
#' @export
FOIL <-
function(formula,
data,
max_len = 3,
min_gain = .7,
best_k = 5,
disc.method = "mdlp") {
formula <- as.formula(formula)
trans <-
prepareTransactions(formula, data, disc.method = disc.method)
parsedFormula <- .parseformula(formula, trans)
class_ids <- parsedFormula$class_ids
# Do FOIL for each class label and join the resulting rules. (see CPAR)
# convert transactions to pattern and class matrices
# (items are columns in a column oriented sparse format)
trans_mat <- t(as(trans, "ngCMatrix"))
dimnames(trans_mat) <- list(NULL, NULL)
m <- ncol(trans_mat) # number of items
n <- nrow(trans_mat) # number of transactions
rules <- list()
# positive and negative examples for class
for (cid in class_ids) {
# find transactions for the class
p <- trans_mat[, cid, drop = FALSE]
p <-
p@i + 1L ### this is a hack since Matrix does not support sparse subsetting
### and we are guaranteed to have only one column in p
pos <- trans_mat[p , , drop = FALSE]
#neg <- trans_mat[!p,, drop = FALSE]
neg <- trans_mat[-p, , drop = FALSE]
patterns <- matrix(NA, nrow = 0, ncol = m)
while (nrow(pos) > 0) {
# new lhs pattern of rule cannot have class labels in it
pat <- logical(m)
pat[class_ids] <- NA
pos2 <- pos
neg2 <- neg
while (nrow(neg2) > 0 && sum(pat, na.rm = TRUE) < max_len) {
# calculate gain for adding an item p to r
# gain(p) = |P*| (log(|P*|/(|P*|+|N*|))-log(|P|/(|P|+|N|)))
to_check <- which(!pat)
# calculate gain for all possible added items
n_pos <- nrow(pos2)
n_neg <- nrow(neg2)
n_pos_covered <- colSums(pos2[, to_check, drop = FALSE])
n_neg_covered <- colSums(neg2[, to_check, drop = FALSE])
# could use log2!
gain <-
n_pos_covered * (log(n_pos_covered / (n_pos_covered + n_neg_covered)) - log(n_pos /
(n_pos + n_neg)))
if (all(gain < min_gain, na.rm = TRUE))
break
take_item <- to_check[which.max(gain)]
pat[take_item] <- TRUE
# remove examples not covered by the rule so far
pos2 <- pos2[pos2[, take_item], , drop = FALSE]
neg2 <- neg2[neg2[, take_item], , drop = FALSE]
}
# no more patterns to find
if (sum(pat, na.rm = TRUE) < 1)
break
# add rule
### FIXME: make rules sparse
patterns <- rbind(patterns, pat)
# remove positive examples covered by the rule
pat[is.na(pat)] <- FALSE
pos <-
pos[!rowSums(pos[, pat, drop = FALSE]) == sum(pat), , drop = FALSE]
}
# convert rules to rule object
patterns[is.na(patterns)] <- FALSE
lhs <-
new("itemMatrix",
data = t(as(as(as(patterns, "nMatrix"), "generalMatrix"), "CsparseMatrix")),
itemInfo = itemInfo(trans))
patterns[] <- FALSE
patterns[, cid] <- TRUE
rhs <-
new("itemMatrix",
data = t(as(as(as(patterns, "nMatrix"), "generalMatrix"), "CsparseMatrix")),
itemInfo = itemInfo(trans))
classrules <- new("rules", lhs = lhs, rhs = rhs)
rules[[length(rules) + 1L]] <- classrules
}
rules <- do.call(c, rules)
quality(rules) <-
interestMeasure(
rules,
trans,
measure = c("support", "confidence", "lift", "laplace"),
k = length(class_ids)
)
rules <- sort(rules, by = "laplace")
# assemble classifier
CBA_ruleset(
formula = formula,
rules = rules,
default = majorityClass(formula, trans),
### FIXME
method = "weighted",
weights = "laplace",
best_k = best_k,
discretization = attr(trans, "disc_info"),
description = paste0("FOIL-based classifier (Yin and Han, 2003)")
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.