Nothing
#######################################################################
# arules - Mining Association Rules and Frequent Itemsets
# Copyright (C) 2011-2015 Michael Hahsler, Christian Buchta,
# Bettina Gruen and Kurt Hornik
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#' Class itemMatrix --- Sparse Binary Incidence Matrix to Represent Sets of
#' Items
#'
#' The `itemMatrix` class is the basic building block for [transactions],
#' and [associations]. The class contains a sparse
#' Matrix representation of a set of itemsets and the
#' corresponding item labels.
#'
#' **Representation**
#'
#' Sets of itemsets are represented as a compressed sparse
#' binary matrix. Conceptually, columns represent items and rows are the sets/transactions.
#' In the compressed form, each itemset is a vector of column indices (called item
#' IDs) representing the items.
#'
#' **Warning:** Ideally, we would store the matrix as a row-oriented sparse
#' matrix (`ngRMatrix`), but the \pkg{Matrix} package provides better support for
#' column-oriented sparse classes ([Matrix::ngCMatrix-class]). The matrix is therefore internally stored
#' in transposed form.
#'
#' **Working with several `itemMatrix` objects**
#'
#' If you work with several `itemMatrix` objects at the same time (e.g.,
#' several transaction sets, lhs and rhs of a rule, etc.), then the encoding
#' (itemLabes and order of the items in the binary matrix) in the different
#' itemMatrices is important and needs to conform. See [itemCoding]
#' to learn how to encode and recode `itemMatrix` objects.
#'
#' @aliases itemMatrix
#' @family itemMatrix and transactions functions
#'
#' @slot data a sparse matrix of class [Matrix::ngCMatrix-class] representing the itemsets.
#' **Warning:** the matrix is stored in transposed form for efficiency reasons!.
#' @slot itemInfo a data.frame
#' @slot itemsetInfo a data.frame
#'
#' @param object,x,from the object.
#' @param maxsum integer, how many items should be shown for the summary?
#' @param ... further parameters
#' @param cols columns for the long format.
#' @param decode decode item IDs to item labels.
#' @param value replacement value
#' @param itemSep item separator symbol.
#' @param setStart set start symbol.
#' @param setEnd set end symbol.
#'
#' @section Objects from the Class:
#' Objects can be created by calls of the form
#' `new("itemMatrix", ...)`. However, most of the time objects will be
#' created by coercion from a matrix, list or data.frame.
#' @author Michael Hahsler
#' @keywords classes
#' @examples
#' set.seed(1234)
#'
#' ## Generate a logical matrix with 5000 random itemsets for 20 items
#' m <- matrix(runif(5000 * 20) > 0.8, ncol = 20,
#' dimnames = list(NULL, paste("item", c(1:20), sep = "")))
#' head(m)
#'
#' ## Coerce the logical matrix into an itemMatrix object
#' imatrix <- as(m, "itemMatrix")
#' imatrix
#'
#' ## An itemMatrix contains a set of itemsets (each row is an itemset).
#' ## The length of the set is the number of rows.
#' length(imatrix)
#'
#' ## The sparese matrix also has regular matrix dimensions.
#' dim(imatrix)
#' nrow(imatrix)
#' ncol(imatrix)
#'
#' ## Subsetting: Get first 5 elements (rows) of the itemMatrix. This can be done in
#' ## several ways.
#' imatrix[1:5] ### get elements 1:5
#' imatrix[1:5, ] ### Matrix subsetting for rows 1:5
#' head(imatrix, n = 5) ### head()
#'
#' ## Get first 5 elements (rows) of the itemMatrix as list.
#' as(imatrix[1:5], "list")
#'
#' ## Get first 5 elements (rows) of the itemMatrix as matrix.
#' as(imatrix[1:5], "matrix")
#'
#' ## Get first 5 elements (rows) of the itemMatrix as sparse ngCMatrix.
#' ## **Warning:** For efficiency reasons, the ngCMatrix is transposed! You
#' ## can transpose it again to get the expected format.
#' as(imatrix[1:5], "ngCMatrix")
#' t(as(imatrix[1:5], "ngCMatrix"))
#'
#' ## Get labels for the first 5 itemsets (first default and then with
#' ## custom formating)
#' labels(imatrix[1:5])
#' labels(imatrix[1:5], itemSep = " + ", setStart = "", setEnd = "")
#'
#' ## Create itemsets manually from an itemMatrix. Itemsets contain items in the form of
#' ## an itemMatrix and additional quality measures (not supplied in the example).
#' is <- new("itemsets", items = imatrix)
#' is
#' inspect(head(is, n = 3))
#'
#'
#' ## Create rules manually. I use imatrix[4:6] for the lhs of the rules and
#' ## imatrix[1:3] for the rhs. Rhs and lhs cannot share items so I use
#' ## itemSetdiff here. I also assign missing values for the quality measures support
#' ## and confidence.
#' rules <- new("rules",
#' lhs = itemSetdiff(imatrix[4:6], imatrix[1:3]),
#' rhs = imatrix[1:3],
#' quality = data.frame(support = c(NA, NA, NA),
#' confidence = c(NA, NA, NA)
#' ))
#' rules
#' inspect(rules)
#'
#' ## Manually create a itemMatrix with an item encoding that matches imatrix (20 items in order
#' ## item1, item2, ..., item20)
#' itemset_list <- list(c("item1","item2"),
#' c("item3"))
#'
#' imatrix_new <- encode(itemset_list, itemLabels = imatrix)
#' imatrix_new
#' compatible(imatrix_new, imatrix)
#'
#' @aliases initialize,itemMatrix-method show,itemMatrix-method
setClass(
"itemMatrix",
representation(
data = "ngCMatrix",
itemInfo = "data.frame",
itemsetInfo = "data.frame"
),
prototype(itemInfo = data.frame(), itemsetInfo = data.frame()),
validity = function(object) {
## itemInfo needs a labels column of appropriate length
## also the labels must be unique for matching objects.
if (length(itemInfo(object)[["labels"]]) != nitems(object))
return("item labels in itemInfo do not match number of columns/items in the itemMatrix")
if (length(unique(itemInfo(object)[["labels"]])) != nitems(object))
return("item labels not unique")
if (ncol(itemsetInfo(object)) &&
nrow(itemsetInfo(object)) != length(object))
return("itemsetInfo does not match number of rows")
TRUE
}
)
setMethod(initialize, "itemMatrix", function(.Object, ...) {
.Object <- callNextMethod()
validObject(.Object)
.Object
})
setMethod("show", signature(object = "itemMatrix"),
function(object) {
cat(
"itemMatrix in sparse format with\n",
length(object),
"rows (elements/transactions) and\n",
nitems(object),
"columns (items)\n"
)
invisible(NULL)
})
setClass(
"summary.itemMatrix",
representation(
Dim = "integer",
density = "numeric",
itemSummary = "integer",
lengths = "table",
lengthSummary = "table",
itemInfo = "data.frame",
itemsetInfo = "data.frame"
)
)
# setGeneric("summary") # is generic in Matrix
#' @describeIn itemMatrix-class show a summary.
#' @aliases summary.itemMatrix-class show,summary.itemMatrix-method
setMethod("summary", signature(object = "itemMatrix"),
function(object, maxsum = 6, ...) {
ifs <-
sort(itemFrequency(object, type = "abs"), decreasing = TRUE)
isum <- head(ifs, maxsum - 1L)
isum <- c(isum, "(Other)" = sum(ifs) - sum(isum))
sizes <- size(object)
new(
"summary.itemMatrix",
Dim = dim(object),
density = .density_Matrix(as(object, "ngCMatrix")),
itemSummary = isum,
lengths = table(sizes),
lengthSummary = summary(sizes),
itemInfo = head(itemInfo(object), 3)
)
})
setMethod("show", signature(object = "summary.itemMatrix"),
function(object) {
cat(
"itemMatrix in sparse format with\n",
object@Dim[1],
"rows (elements/itemsets/transactions) and\n",
object@Dim[2],
"columns (items) and a density of",
object@density,
"\n"
)
cat("\nmost frequent items:\n")
print(object@itemSummary)
cat("\nelement (itemset/transaction) length distribution:\n")
print(object@lengths)
cat("\n")
print(object@lengthSummary)
if (length(names(object@itemInfo))) {
cat("\nincludes extended item information - examples:\n")
print(object@itemInfo)
}
invisible(NULL)
})
#' @describeIn itemMatrix-class returns the number of rows
#' (itemsets) and columns (items in the encoding).
setMethod("dim", signature(x = "itemMatrix"),
function(x)
rev(dim(x@data)))
#' @rdname itemMatrix-class
setGeneric("nitems",
function(x, ...)
standardGeneric("nitems"))
#' @describeIn itemMatrix-class returns the number of items in the encoding.
#' @aliases nitems
setMethod("nitems", signature(x = "itemMatrix"),
function(x)
ncol(x))
#' @describeIn itemMatrix-class returns the number of itemsets (rows) in the matrix.
setMethod("length", signature(x = "itemMatrix"),
function(x)
nrow(x))
#' @rdname itemMatrix-class
setGeneric("toLongFormat",
function(from, ...) standardGeneric("toLongFormat"))
#' @describeIn itemMatrix-class convert the sets to long format
#' (a data.frame with two columns, ID and item). Column names
#' can be specified as a character vector of length 2 called `cols`.
#' @aliases toLongFormat
setMethod("toLongFormat", signature(from = "itemMatrix"),
function(from,
cols = c("ID", "item"),
decode = TRUE) {
from_list <- LIST(from, decode = FALSE)
tids <-
unlist(lapply(
seq(length(from_list)),
FUN = function(i)
rep(i, times = length(from_list[[i]]))
))
items <- unlist(from_list)
if (decode)
items <- factor(items, labels = itemLabels(from))
df <- data.frame(tids, items)
colnames(df) <- cols
df
})
#' @describeIn itemMatrix-class returns labels for the itemsets.
#' The following arguments can be used to customize the representation
#' of the labels: `itemSep`, `setStart` and `setEnd`.
setMethod("labels", signature(object = "itemMatrix"),
function(object,
itemSep = ",",
setStart = "{",
setEnd = "}")
paste(setStart, sapply(as(object, "list"), paste,
collapse = itemSep), setEnd, sep = ""))
#' @rdname itemMatrix-class
setGeneric("itemLabels",
function(object, ...)
standardGeneric("itemLabels"))
#' @rdname itemMatrix-class
setGeneric("itemLabels<-",
function(object, value)
standardGeneric("itemLabels<-"))
#' @describeIn itemMatrix-class returns the item labels used for encoding as a character vector.
#' @aliases itemLabels
setMethod("itemLabels", signature(object = "itemMatrix"),
function(object)
colnames(object))
#' @describeIn itemMatrix-class replaces the item labels used for encoding.
setReplaceMethod("itemLabels", signature(object = "itemMatrix"),
function(object, value) {
colnames(object) <- value
object
})
#' @rdname itemMatrix-class
setGeneric("itemInfo",
function(object) standardGeneric("itemInfo"))
#' @rdname itemMatrix-class
setGeneric("itemInfo<-",
function(object, value) standardGeneric("itemInfo<-"))
#' @describeIn itemMatrix-class returns the whole item/column information data.frame including labels.
#' @aliases itemInfo
setMethod("itemInfo", signature(object = "itemMatrix"),
function(object)
object@itemInfo)
#' @describeIn itemMatrix-class replaces the item/column info by a data.frame.
#' @aliases itemInfo<-
setReplaceMethod("itemInfo", signature(object = "itemMatrix"),
function(object, value) {
object@itemInfo <- value
validObject(object)
object
})
#' @rdname itemMatrix-class
setGeneric("itemsetInfo",
function(object) standardGeneric("itemsetInfo"))
#' @rdname itemMatrix-class
setGeneric("itemsetInfo<-",
function(object, value) standardGeneric("itemsetInfo<-"))
#' @describeIn itemMatrix-class returns the item set/row information data.frame.
#' @aliases itemsetInfo
setMethod("itemsetInfo", signature(object = "itemMatrix"),
function(object)
object@itemsetInfo)
#' @describeIn itemMatrix-class replaces the item set/row info by a data.frame.
#' @aliases itemsetInfo<-
setReplaceMethod("itemsetInfo", signature(object = "itemMatrix"),
function(object, value) {
object@itemsetInfo <- value
validObject(object)
object
})
#' @rdname itemMatrix-class
#' @name coercion-itemMatrix
#' @aliases
#' coerce,matrix,itemMatrix-method
#' coerce,itemMatrix,matrix-method
#' coerce,list,itemMatrix-method
#' coerce,itemMatrix,list-method
#' coerce,itemMatrix,ngCMatrix-method
#' coerce,ngCMatrix,itemMatrix-method
#'
#' @section Coercions:
#'
#' * `as("matrix", "itemMatrix")`
#' * `as("itemMatrix", "matrix")`
#' * `as("list", "itemMatrix")`
#' * `as("itemMatrix", "list")`
#' * `as("itemMatrix", "ngCMatrix")`
#' * `as("ngCMatrix", "itemMatrix")`
#'
#' **Warning:** the `ngCMatrix` representation is transposed!
NULL
setAs("matrix", "itemMatrix",
function(from) {
## Matrix requires logical. however, it should be
## the responsibility of mode to avoid unnecessary
## coercions.
if (mode(from) != "logical") {
## check if it is a 0-1 matrix
if (mode(from) != "numeric")
stop("matrix is not logical or a numeric 0-1 matrix!")
if (any(from != 0 &
from != 1))
warning("matrix contains values other than 0 and 1! Setting all entries != 0 to 1.")
mode(from) <- "logical"
}
## we have to transpose since there is currently no
## support for "ngRMatrix" in Matrix. note that we
## can fail later as the row or column names need
## not neccessarily be unique.
itemInfo <- data.frame(labels = labels(from)[[2]],
stringsAsFactors = FALSE) ## colnames
itemsetInfo <- data.frame(itemsetID = labels(from)[[1]],
stringsAsFactors = FALSE) ## rownames
dimnames(from) <- NULL
### Note: data is a transposed ngCMatrix
new(
"itemMatrix",
data = t(as(as(as(from, "nMatrix"), "generalMatrix"), "CsparseMatrix")),
itemInfo = itemInfo,
itemsetInfo = itemsetInfo
)
})
setAs("itemMatrix", "matrix",
function(from) {
to <- as(t(from@data), "matrix")
dimnames(to) <- dimnames(from)
to
})
#' @describeIn itemMatrix-class returns a list with the dimname vectors.
setMethod("dimnames", signature(x = "itemMatrix"),
function(x) {
## NOTE: as.character is to support old data which used I()
rowLabels <- itemsetInfo(x)[["itemsetID"]]
if (!is.null(rowLabels))
rowLabels <- as.character(rowLabels)
## NOTE: as.character is to support old data which used I()
colLabels <- as.character(itemInfo(x)[["labels"]])
list(rowLabels, colLabels)
})
#' @describeIn itemMatrix-class replace the dimnames.
setReplaceMethod("dimnames", signature(x = "itemMatrix",
value = "list"),
function(x, value) {
if (any(dim(x) != sapply(value, length) & !sapply(value, is.null)))
stop("Incorrect number of labels for dimnames.")
if (!is.null(value[[1]])) {
if (ncol(itemsetInfo(x)) == 0) {
itemsetInfo(x) <- data.frame(itemsetID = value[[1]])
} else{
itemsetInfo(x)[["itemsetID"]] <- value[[1]]
}
}
if (!is.null(value[[2]])) {
itemInfo(x)[["labels"]] <- value[[2]]
}
x
})
setAs("itemMatrix", "list",
function(from)
LIST(from, decode = TRUE))
setAs("list", "itemMatrix",
function(from) {
if (!length(from))
return(new("itemMatrix"))
## some checks
if (!all(sapply(from, is.atomic)))
stop("can coerce list with atomic components only")
if (any(dup <- sapply(from, anyDuplicated))) {
warning("removing duplicated items in transactions")
for (i in which(dup > 0))
from[[i]] <- unique(from[[i]])
}
## fix for Matrix (ceeboo 2009)
from <- lapply(from, sort)
p <- sapply(from, length)
names(p) <- NULL
p <- cumsum(p)
i <- unlist(from, use.names = FALSE)
i <- factor(i)
p <- new(
"ngCMatrix",
p = c(0L, p),
i = as.integer(i) - 1L,
Dim = c(length(levels(i)), length(p))
)
new(
"itemMatrix",
data = p,
itemInfo = data.frame(labels = levels(i),
stringsAsFactors = FALSE),
itemsetInfo = data.frame(itemsetID = labels(from),
stringsAsFactors = FALSE)
)
})
setAs("itemMatrix", "ngCMatrix",
function(from) {
dimnames(from@data) <- rev(dimnames(from))
from@data
})
setAs("ngCMatrix", "itemMatrix",
function(from) {
itemInfo <- data.frame(labels = labels(from)[[1]],
stringsAsFactors = FALSE) ## rownames
itemsetInfo <- data.frame(itemsetID = labels(from)[[2]],
stringsAsFactors = FALSE) ## colnames
dimnames(from) <- list(NULL, NULL)
new(
"itemMatrix",
data = from,
itemInfo = itemInfo,
itemsetInfo = itemsetInfo
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.