Nothing
#######################################################################
# arules - Mining Association Rules and Frequent Itemsets
# Copyright (C) 2011-2015 Michael Hahsler, Christian Buchta,
# Bettina Gruen and Kurt Hornik
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#' Association Rule Induction from Itemsets
#'
#' Provides the generic function `ruleInduction()` and the method to induce all association
#' [rules] which can be generated by the given set of [itemsets] from a [transactions]
#' dataset.
#'
#' All rules that can be created using the supplied itemsets and that surpass the
#' specified minimum confidence threshold are returned.
#' `ruleInduction()` can be used to produce
#' closed association rules defined by Pei
#' et al. (2000) as rules `X => Y` where both `X` and `Y` are
#' closed frequent itemsets. See the code example in the Example section.
#'
#' Rule induction implements several induction methods. The default method is `"ptree"`
#'
#' - `"ptree"` **method without transactions:**
#' No transactions are need to be specified if
#' `x` contains a complete set of frequent or
#' itemsets. The itemsets' support counts are stored in a ptree and then retrieved to
#' create rules and calculate rules confidence. This is very fast,
#' but fails because of missing
#' support values if `x` is not a complete set of frequent itemsets.
#'
#' - `"ptree"` **method with transactions:**
#' If transactions are specified then all transactions are counted into a prefix
#' tree and
#' later retrieved to create rules from the itemsets and calculate confidence values.
#' This is slower, but necessary if `x` is not a complete set of frequent itemsets.
#' To improve speed, unused items are removed from the transaction
#' data before creating the prefix tree (this behavior can be changed using the
#' argument `reduce`). This might be slower for large transaction
#' data sets. However, this is highly recommended as the items are also
#' reordered to reduce the counting time.
#'
#'- `"apriori"` **method (always needs transactions):**
#' All association rules are mined from the transactions data set using [apriori()]
#' with the
#' smallest support found in the itemsets. In a second step, all rules which cannot
#' be generated from one of the itemsets are removed. This procedure is very slow,
#' especially for itemsets with many elements or very low support.
#'
#' @family mining algorithms
#' @family postporocessing
#'
#' @param x the set of [itemsets] from which rules will be induced.
#' @param transactions the [transactions] used to mine the itemsets. Can
#' be omitted for method `"ptree"`,
#' if `x` contains a (complete set) of frequent itemsets
#' together with their support counts.
#' @param confidence a numeric value between 0 and 1 giving the minimum confidence
#' threshold for the rules.
#' @param method `"ptree"` or `"apriori"`
#' @param reduce remove unused items to speed up the counting process?
#' @param verbose report progress?
#' @param ... further arguments.
#' @return An object of class [rules].
#' @author Christian Buchta and Michael Hahsler
#' @references Michael Hahsler, Christian Buchta, and Kurt Hornik. Selective
#' association rule generation. _Computational Statistics,_ 23(2):303-315,
#' April 2008.
#'
#' Jian Pei, Jiawei Han, Runying Mao. CLOSET: An Efficient Algorithm for Mining
#' Frequent Closed Itemsets. _ACM SIGMOD Workshop on Research Issues in Data
#' Mining and Knowledge Discovery (DMKD 2000)._
#' @keywords models
#' @examples
#' data("Adult")
#'
#' ## find all closed frequent itemsets
#' closed_is <- apriori(Adult, target = "closed frequent itemsets", support = 0.4)
#' closed_is
#'
#' ## use rule induction to produce all closed association rules
#' closed_rules <- ruleInduction(closed_is, transactions = Adult, verbose = TRUE)
#'
#' ## inspect the resulting closed rules
#' summary(closed_rules)
#' inspect(head(closed_rules, by = "lift"))
#'
#' ## get rules from frequent itemsets. Here, transactions does not need to be
#' ## specified for rule induction.
#' frequent_is <- eclat(Adult, support = 0.4)
#' assoc_rules <- ruleInduction(frequent_is)
#' assoc_rules
#' inspect(head(assoc_rules))
#'
#' ## for itemsets that are not a complete set of frequent itemsets,
#' ## transactions need to be specified.
#' some_is <- sample(frequent_is, 10)
#' some_rules <- ruleInduction(some_is, transactions = Adult)
#' some_rules
setGeneric("ruleInduction",
function(x, ...)
standardGeneric("ruleInduction"))
#' @rdname ruleInduction
setMethod("ruleInduction", signature(x = "itemsets"),
function(x,
transactions = NULL,
confidence = 0.8,
method = c("ptree", "apriori"),
reduce = FALSE,
verbose = FALSE,
...) {
method <- match.arg(method)
## check transaction data
if (!is.null(transactions)) {
nItems <- nitems(transactions)
if (nItems != nitems(items(x)))
stop("Dimensions of x and transactions do not match!")
if (any(itemLabels(x) != itemLabels(transactions)))
stop("Item labels for x and transactions do not match!")
}
if (verbose)
cat("ruleInduction: using method", method, "\n")
## find rules
pt1 <- proc.time()
rules <-
if (method == "ptree" && !is.null(transactions))
ruleInduction.ptree(x, transactions,
confidence, reduce, verbose)
else if (method == "ptree" && is.null(transactions))
ruleInduction.index(x, confidence, verbose)
#else if(method == "tidlists") ruleInduction.tidlists(x,
# transactions, confidence, verbose)
else
ruleInduction.apriori(x, transactions,
confidence, reduce, verbose)
pt2 <- proc.time()
if (verbose)
cat("searching done [", pt2[1] - pt1[1], "s].\n", sep = "")
info <- x@info
if (is.null(info$data))
info <- c(x = match.call()$x, info)
## apriori
if (is.null(info$confidence))
info <- c(info, confidence = confidence)
else
info$confidence <- confidence
rules@info <- info
pt3 <- proc.time()
if (verbose)
cat("postprocessing done [", pt3[1] - pt2[1], "s].\n",
sep = "")
## return found rules
rules
})
ruleInduction.apriori <-
function(x,
transactions,
confidence = 0.8,
reduce = FALSE,
verbose = FALSE) {
if (is.null(transactions))
stop("rule induction method apriori needs transactions!")
nItems <- nitems(transactions)
itemInfo <- itemInfo(transactions)
if (reduce) {
ifreq <- itemFrequency(items(x), type = "abs")
items.involved <- which(ifreq > 0)
if (verbose)
cat(
"reducing data from",
nitems(items(x)),
"items to",
length(items.involved) ,
"items\n"
)
x@items <- x@items[, items.involved]
transactions <- transactions[, items.involved]
}
empty_rules <- function(trans) {
em <- as(trans, "itemMatrix")[0]
new(
"rules",
lhs = em,
rhs = em,
quality = data.frame(
support = numeric(0),
confidence = numeric(0),
lift = numeric(0)
)
)
}
if (length(transactions) < 1)
return(empty_rules(transactions))
## itemset sizes
isetsSize <- size(x)
## find minimal support and mine all rules
## Note: minSupport is reduced by epsilon so we get the rules
## with support == min support in x
minSupport <-
min(quality(x)$support) - 1 / length(transactions)
### suppress maxlen warnings
suppressWarnings(rules <-
apriori(
transactions,
parameter = list(
support = minSupport,
confidence = confidence,
target = "rules",
minlen = min(isetsSize),
maxlen = max(isetsSize)
),
control = list(verbose = verbose)
))
## find rules which were generated by the itemsets
if (verbose)
cat(paste("starting to filter", length(rules), "rules.\n"))
take <- !is.na(match(items(rules), items(x)))
if (verbose)
cat("filtering done.\n")
rules <- rules[take]
if (verbose)
cat("left with", length(rules), "rules.\n")
if (reduce) {
## expand items back to full space
## -1 since indices in ngCMatix start with 0
items.index <- items.involved - 1L
## fix dim
rules@lhs@data@Dim[1] <- nItems
rules@rhs@data@Dim[1] <- nItems
## fix column indices
## +1 since indices in ngCMatix start with 0
rules@lhs@data@i <- items.index[(rules@lhs@data@i + 1L)]
rules@rhs@data@i <- items.index[(rules@rhs@data@i + 1L)]
## fix item labels
rules@lhs@itemInfo <- itemInfo
rules@rhs@itemInfo <- itemInfo
}
rules
}
# FIXME: Currently disabled
ruleInduction.tidlists <-
function(x,
transactions,
confidence = 0.8,
verbose = FALSE) {
tid <- as(transactions, "tidLists")
data <- .Call(R_tid_rules , tid@data, x@items@data)
names(data) <- c("support",
"confidence",
"lhs_i",
"lhs_p",
"rhs_i",
"rhs_p",
"Dim")
quality <-
data.frame(support = data$support,
confidence = data$confidence)
lhs <-
new(
"ngCMatrix",
i = data$lhs_i,
p = data$lhs_p,
Dim = data$Dim
)
rhs <-
new(
"ngCMatrix",
i = data$rhs_i,
p = data$rhs_p,
Dim = data$Dim
)
lhs <-
new("itemMatrix",
data = lhs,
itemInfo = x@items@itemInfo)
rhs <-
new("itemMatrix",
data = rhs,
itemInfo = x@items@itemInfo)
rules <- new("rules",
lhs = lhs,
rhs = rhs,
quality = quality)
rules <- rules[quality(rules)$confidence > confidence]
rules
}
## ptree support counting
ruleInduction.ptree <-
function(x,
transactions,
confidence = 0.8,
reduce = FALSE,
verbose = FALSE) {
r <-
.Call(R_pncount,
x@items@data,
transactions@data,
FALSE,
reduce,
verbose)
names(r) <-
c("data.lhs",
"data.rhs",
"support",
"confidence",
"lift",
"itemset")
## quality: set NAs to 0 since they are the result of items missing
## in transactions
q <- as.data.frame(r[3:6])
q[is.na(q)] <- 0
take <- q$confidence >= confidence
rules(
lhs = new(
"itemMatrix",
data = r$data.lhs,
itemInfo = transactions@itemInfo
)[take,],
rhs = new(
"itemMatrix",
data = r$data.rhs,
itemInfo = transactions@itemInfo
)[take,],
quality = q[take,]
)
}
## ptree indexing
ruleInduction.index <-
function(x,
confidence = 0.8,
verbose = FALSE) {
if (is.null(quality(x)$support))
stop("cannot induce rules because support is missing ! Specify transactions.")
r <- data.frame(.Call(R_pnrindex, x@items@data, verbose))
names(r) <- c("i", "li", "ri")
if (!all(r$li) || !all(r$ri))
stop("cannot induce rules because itemsets are incomplete ! Specify transactions.")
r$support <- x@quality$support[r$i]
r$confidence <- r$support /
x@quality$support[r$li]
# filter
r <- r[r$confidence >= confidence, ]
if (dim(r)[1] == 0)
return(new("rules"))
r$lift <- r$confidence / x@quality$support[r$ri]
rules(lhs = x@items[r$li],
rhs = x@items[r$ri],
quality = r[4:6])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.