#' Use FOIL to learn a rule set for classification
#'
#' Build a classifier rule base using FOIL (First Order Inductive Learner), a
#' greedy algorithm that learns rules to distinguish positive from negative
#' examples.
#'
#' Implements FOIL (Quinlan and Cameron-Jones, 1995) to learn rules and then
#' use them as a classifier following Xiaoxin and Han (2003).
#'
#' For each class, we find the positive and negative examples and learn the
#' rules using FOIL. Then the rules for all classes are combined and sorted by
#' Laplace accuracy on the training data.
#'
#' Following Xiaoxin and Han (2003), we classify new examples by \enumerate{
#' \item select all the rules whose bodies are satisfied by the example; \item
#' from the rules select the best k rules per class (highest expected Laplace
#' accuracy); \item average the expected Laplace accuracy per class and choose
#' the class with the highest average. }
#'
#' @aliases FOIL foil
#' @param formula A symbolic description of the model to be fitted. Has to be
#' of form \code{class ~ .} or \code{class ~ predictor1 + predictor2}.
#' @param data A data.frame or a transaction set containing the training data.
#' Data frames are automatically discretized and converted to transactions.
#' @param max_len maximal length of the LHS of the created rules.
#' @param min_gain minimal gain required to expand a rule.
#' @param best_k use the average expected accuracy (laplace) of the best k
#' rules per class for prediction.
#' @param disc.method Discretization method used to discretize continuous
#' variables if data is a data.frame (default: \code{"mdlp"}). See
#' \code{\link{discretizeDF.supervised}} for more supervised discretization
#' methods.
#' @return Returns an object of class \code{\link{CBA.object}} representing the
#' trained classifier.
#' @author Michael Hahsler
#' @seealso \code{\link{CBA.object}}.
#' @references Quinlan, J.R., Cameron-Jones, R.M. Induction of logic programs:
#' FOIL and related systems. NGCO 13, 287-312 (1995).
#' \doi{10.1007/BF03037228}
#'
#' Yin, Xiaoxin and Jiawei Han. CPAR: Classification based on Predictive
#' Association Rules, SDM, 2003.
#' \doi{10.1137/1.9781611972733.40}
#' @examples
#'
#' data("iris")
#'
#' # learn a classifier using automatic default discretization
#' classifier <- FOIL(Species ~ ., data = iris)
#' classifier
#'
#' # inspect the rule base
#' inspect(rules(classifier))
#'
#' # make predictions for the first few instances of iris
#' predict(classifier, head(iris))
#'
FOIL <- function(formula, data, max_len = 3, min_gain = .7, best_k = 5, disc.method = "mdlp"){
formula <- as.formula(formula)
trans <- prepareTransactions(formula, data, disc.method = disc.method)
parsedFormula <- .parseformula(formula, trans)
class_ids <- parsedFormula$class_ids
# Do FOIL for each class label and join the resulting rules. (see CPAR)
# convert transactions to pattern and class matrices
# (items are columns in a column oriented sparse format)
trans_mat <- t(as(trans, "ngCMatrix"))
dimnames(trans_mat) <- list(NULL, NULL)
m <- ncol(trans_mat) # number of items
n <- nrow(trans_mat) # number of transactions
rules <- list()
# positive and negative examples for class
for(cid in class_ids) {
# find transactions for the class
p <- trans_mat[, cid, drop = FALSE]
p <- p@i+1L ### this is a hack since Matrix does not support sparse subsetting
### and we are guaranteed to have only one column in p
pos <- trans_mat[p ,, drop = FALSE]
#neg <- trans_mat[!p,, drop = FALSE]
neg <- trans_mat[-p,, drop = FALSE]
patterns <- matrix(NA, nrow = 0, ncol = m)
while(nrow(pos) > 0) {
# new lhs pattern of rule cannot have class labels in it
pat <- logical(m)
pat[class_ids] <- NA
pos2 <- pos
neg2 <- neg
while(nrow(neg2) > 0 && sum(pat, na.rm = TRUE) < max_len) {
# calculate gain for adding an item p to r
# gain(p) = |P*| (log(|P*|/(|P*|+|N*|))-log(|P|/(|P|+|N|)))
to_check <- which(!pat)
# calculate gain for all possible added items
n_pos <- nrow(pos2)
n_neg <- nrow(neg2)
n_pos_covered <- colSums(pos2[,to_check, drop = FALSE])
n_neg_covered <- colSums(neg2[,to_check, drop = FALSE])
# could use log2!
gain <- n_pos_covered * (log(n_pos_covered/(n_pos_covered + n_neg_covered)) - log(n_pos/(n_pos + n_neg)))
if(all(gain < min_gain, na.rm = TRUE)) break
take_item <- to_check[which.max(gain)]
pat[take_item] <- TRUE
# remove examples not covered by the rule so far
pos2 <- pos2[pos2[,take_item],, drop = FALSE]
neg2 <- neg2[neg2[,take_item],, drop = FALSE]
}
# no more patterns to find
if(sum(pat, na.rm = TRUE) < 1) break
# add rule
### FIXME: make rules sparse
patterns <- rbind(patterns, pat)
# remove positive examples covered by the rule
pat[is.na(pat)] <- FALSE
pos <- pos[!rowSums(pos[,pat, drop = FALSE])==sum(pat),, drop = FALSE]
}
# convert rules to rule object
patterns[is.na(patterns)] <- FALSE
lhs <- new("itemMatrix", data = t(as(patterns, "ngCMatrix")), itemInfo = itemInfo(trans))
patterns[] <- FALSE
patterns[, cid] <- TRUE
rhs <- new("itemMatrix", data = t(as(patterns, "ngCMatrix")), itemInfo = itemInfo(trans))
classrules <- new("rules", lhs = lhs, rhs = rhs)
rules[[length(rules)+1L]] <- classrules
}
rules <- do.call(c, rules)
quality(rules) <- interestMeasure(rules, trans, measure = c("support","confidence","lift", "laplace"),
k = length(class_ids))
rules <- sort(rules, by = "laplace")
# assemble classifier
CBA_ruleset(
formula = formula,
rules = rules,
default = majorityClass(formula, trans), ### FIXME
method = "weighted",
weights = "laplace",
best_k = best_k,
discretization = attr(trans, "disc_info"),
description = paste0("FOIL-based classifier (Yin and Han, 2003)")
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.