# ------------------------------------------------------------------------------------------------ #
# ----------------------------- All methods for the classes. ------------------------------------- #
# ------------------------------------------------------------------------------------------------ #
# ------------------------------------------------------------------------------------------------ #
# ---------------------------- All methods for class TAMATRIX ------------------------------------ #
# ------------------------------------------------------------------------------------------------ #
#' Determine the number of items in a TAMatrix
#'
#' The length function for the TAMatrix returns the number of items in that TAMatrix.
#' @name length-TAMatrix
#' @rdname length-TAMatrix
#' @param x Object of class TAMatrix
#' @aliases length-TAMatrix length,TAMatrix-method
#' @return Number of items within the TAMatrix
#' @export
#' @importFrom grDevices as.raster colorRampPalette
#' @importFrom graphics layout rasterImage text axis
#' @import methods
#' @include allClasses.R
setMethod("length", "TAMatrix", function(x) {
x@dim[1]
})
#' Definition of print method for TAMatrix
#'
#' The print function prints out all items of the TAMatrix with their respective counts. They are
#' sorted in a descending order by their frequency.
#' @name print-TAMatrix
#' @rdname print-TAMatrix
#' @export
#' @param x Object of class TAMatrix
#' @param descending Logical value per default TRUE.
#' @aliases print-TAMatrix print,TAMatrix-method
#' @return The frequent itemsets ordererd by their occurence
setMethod("print", "TAMatrix", function(x, descending = TRUE) {
#collect all itemnames, print them by their frequency in a descending order
print(data.frame(frequency = sort(rowSums(x@data), decreasing = descending)))
})
#' Definition of show method for TAMatrix
#'
#' The show function prints out the number of items in the TAMatrix
#' @name show-TAMatrix
#' @rdname show-TAMatrix
#' @export
#' @param object Object of class TAMatrix
#' @aliases show-TAMatrix show,TAMatrix-method
#' @return Short message stating the number of items.
setMethod("show", "TAMatrix", function(object) {
n <- length(object)
cat("Found", n, "items. Use the print() to display\n")
})
#' Summary for TAMatrix objects
#'
#' The summary function gives general information about the TAMatrix such as the density or the
#' distribution of the length of the itemsets
#' @name summary-TAMatrix
#' @rdname summary-TAMatrix
#' @export
#' @param object Object of class TAMatrix
#' @aliases summary-TAMatrix summary,TAMatrix-method
#' @return Summary information about the TAMatrix
setMethod("summary", signature(object = "TAMatrix"), function(object) {
#matrix density
TAM.density <- sum(colSums(object@data)) / (nrow(object@data) * ncol(object@data))
TAM.density <- round(TAM.density, 4)
#Overview over transactions matrix
cat("\n")
cat("Transaction database in binary sparse matrix representation \n with",
nrow(object@data), "rows (items) and \n",
ncol(object@data), "columns (itemsets/transactions) and \n a density of",
TAM.density, "(sparsity:", paste0(1 - TAM.density, ")"))
cat("\n")
#top 8 most frequent items
cat("\n")
cat("Most frequent items: \n" )
print(sort(rowSums(object@data), decreasing = T)[1:8])
cat("\n")
#distribution of itemset lengths
cat("Distribution of itemset length:\n")
print(table(colSums(object@data)))
cat("\n")
#summary statistics on itemset lengths
print(summary(colSums(object@data)))
cat("\n")
})
#' Plot TAMatrix objects
#'
#' The plot function for TAMatrix creates a histogram of the itemset lengths
#' @name plot-TAMatrix
#' @rdname plot-TAMatrix
#' @export
#' @param x Object of class TAMatrix
#' @aliases plot-TAMatrix plot,TAMatrix-method
#' @importFrom utils data
#' @importFrom ggplot2 qplot
#' @importFrom ggplot2 aes element_text geom_bar geom_point ggplot labs scale_color_gradient theme scale_x_continuous
setMethod("plot", signature(x = "TAMatrix"), function(x) {
if (length(x) <= 0) {
stop("Object must contain at least one item")
} else {
#determine maximum length of itemsets to obtain number breakpoints
max.itemlength <- max(colSums(x@data))
hist(colSums(x@data),
breaks = max.itemlength + 1,
main = "Distribution of itemset lengths", xlab = "Itemset length",
col = "lightblue")
}
})
#' Plot an TAMatrix object with ggplot2
#'
#' The qplot function for TA Matrix does give a histgram of the itemset length.
#' @name qplot-TAMatrix
#' @rdname qplot-TAMatrix
#' @export
#' @param x Object of class TAMatrix
#' @aliases qplot-TAMatrix qplot,TAMatrix-method
#' @return histogram of the length of the itemsets within the TA Matrix.
#' @export
#' @importFrom arules items support
setMethod("qplot", signature(x = "TAMatrix"), function(x) {
#dataframe needed for ggplot
df <- data.frame(itemset_length = colSums(x@data))
ggplot(df, aes(df$itemset_length)) +
geom_bar() +
labs(title = "Histogram of itemset lengths", x = "Itemset length") +
theme(plot.title = element_text(hjust = 0.5))
})
#' Export the item names for a TAMatrix
#' @name items-TAMatrix
#' @rdname items-TAMatrix
#' @export
#' @param x Object of class TAMatrix
#' @aliases items-TAMatrix items,TAMatrix-method
#' @return Names of all items in TAMatrix.
setMethod("items", signature = signature(x = "TAMatrix"),
function(x) {
return(rownames(x@data))
})
#' Return the sum of each column for the underlying matrix within an TAMatrix
#'
#' In the matrix underlying the TAMatrix the rows represent the items and the columns represent
#' the itemsets. Here the sums of all columns should be calculated that are the number of items
#' for each itesmet.
#' @name colSums-TAMatrix
#' @rdname colSums-TAMatrix
#' @export
#' @param x Object of class TAMatrix
#' @aliases colSums-TAMatrix colSums,TAMatrix-method
#' @return numeric vector containing the sum of each column of the TAMatrix
setMethod("colSums", signature = signature(x = "TAMatrix"),
function(x) {
return(colSums(x@data))
})
#' Return the row Sums for the underlying matrix within an TAMatrix.
#'
#' In the matrix underlying the TAMatrix the rows represent the items and the columns to represent
#' the itemsets. Here the sums of each row should be calculated that are the number of occurences
#' of each item in the different transactions.
#' @name rowSums-TAMatrix
#' @rdname rowSums-TAMatrix
#' @export
#' @param x Object of class TAMatrix
#' @aliases rowSums-TAMatrix rowSums,TAMatrix-method
#' @return numeric vector containing the sum of each row of the TAMatrix
setMethod("rowSums", signature = signature(x = "TAMatrix"),
function(x) {
return(rowSums(x@data))
})
#' Return the number of columns of underlying matrix in an TAMatrix.
#'
#' This number does represent the number of itemsets within that TAMatrix
#' @name ncol-TAMatrix
#' @rdname ncol-TAMatrix
#' @export
#' @param x Object of class TAMatrix
#' @aliases ncol-TAMatrix ncol,TAMatrix-method
#' @return number of columns / itemsets in the TAMatrix
setMethod("ncol", signature = signature(x = "TAMatrix"),
function(x) {
return(ncol(x@data))
})
#' Return the number of rows of underlying matrix in an TAMatrix.
#'
#' This number does represent the number of items within that TAMatrix
#' @name nrow-TAMatrix
#' @rdname nrow-TAMatrix
#' @export
#' @param x Object of class TAMatrix
#' @aliases nrow-TAMatrix nrow,TAMatrix-method
#' @return number of rows / items in the TAMatrix
setMethod("nrow", signature = signature(x = "TAMatrix"),
function(x) {
return(nrow(x@data))
})
#' Subsetting of TAMatrix, FIMatrix and Rules class.
#' @name select
#' @rdname select
#' @export
#' @param x Object to subset
#' @param i Either rows represented by their row number or logical vector of length
#' number of rows
#' @param j Either columns represented by their column number or logical vector of length
#' number of columns
#' @return A character vector containing the names of the items
setGeneric("select", function(x, i, j) standardGeneric("select"))
#' Subsetting of an TAMatrix
#'
#' An TAMatrix does contain the matrix of all transactions as well as the dimensions of that
#' matrix and the names of all items. Therefore, all these parts are logically connected and have
#' to be changed when the matrix is subsetted.
#' @name select-TAMatrix
#' @rdname select-TAMatrix
#' @param x Object of class TAMatrix
#' @param i Either the rows represented by their row number or a logical vector of length number of
#' row of TAMatrix. If missing or NULL all rows are selected.
#' @param j Either the columns represented by their columns numbers or logical vector of length
#' number of columns in TAMatrix. If missing or NULL all columns are selected.
#' @aliases select-TAMatrix select,TAMatrix-method
#' @return subsetted TAMatrix
setMethod("select", signature = signature(x = "TAMatrix"),
function(x, i, j) {
# Make some sanity checks on i, j.
if (!(missing(i) || is.null(i))) {
if (is.logical(i)) {
if (length(i) > nrow(x)) {
stop(paste('Logical subscript of length',
length(i), "too long for TAMatrix with", nrow(x), "rows"))
}
} else {
if (is.numeric(i)) {
if (any(!(i %in% 1:nrow(x)))) {
stop(paste("Subscript is too long. (", paste(i[!i %in% 1:nrow(x)], collapse = ', '),
") cannot be subsetted from TAMatrix with ", nrow(x), ' rows', sep = ''))
}
}
}
}
if (!(missing(j) || is.null(j))) {
if (is.logical(j)) {
if (length(j) > ncol(x)) {
stop(paste('Logical subscript of length', length(j), "too long for TAMatrix with",
ncol(x), "columns"))
}
} else {
if (is.numeric(j)) {
if (any(!(j %in% 1:ncol(x)))) {
stop(paste("Subscript is too long. (", paste(j[!j %in% 1:ncol(x)], collapse = ', '),
") cannot be subsetted from TAMatrix with ", ncol(x), ' columns', sep = ''))
}
}
}
}
# If i or j are missing, all rows / columns should be selected.
if (missing(i) || is.null(i)) {
if (is.logical(j)) {
j <- which(j)
}
return(new('TAMatrix',
data = x@data[, j, drop = FALSE],
dim = c(nrow(x@data), length(j)),
items = x@items))
}
if (missing(j) || is.null(j)) {
if (is.logical(i)) {
i <- which(i)
}
return(new('TAMatrix',
data = x@data[i, , drop = FALSE],
dim = c(length(i), ncol(x@data)),
items = x@items[i, drop = FALSE]))
}
if ((missing(i) || is.null(i)) && (missing(j) || is.null(j))) {
return(x)
}
# If i or j is logical than make it to an integer of the true positions
if (is.logical(i)) {
i <- which(i)
}
if (is.logical(j)) {
j <- which(j)
}
return(new('TAMatrix',
data = x@data[i, j, drop = FALSE],
dim = c(length(i), length(j)),
items = x@items[i, drop = FALSE]))
})
# ------------------------------------------------------------------------------------------------ #
# -------------------------------- All methods for class FIMatrix -------------------------------- #
# ------------------------------------------------------------------------------------------------ #
#' Determine the number of items in a FIMatrix
#'
#' The length function for the FIMatrix returns the number of frequent items.
#' @name length-FIMatrix
#' @rdname length-FIMatrix
#' @param x Object of class FIMatrix
#' @aliases length-FIMatrix length,FIMatrix-method
#' @return Number of frequent itemsets within the FIMatrix
#' @export
setMethod("length", "FIMatrix", function(x) {
x@data@Dim[2]
})
#' Definition of show method for FIMatrix
#'
#' The show function prints out the number of frequent itemsets in the FIMatrix.
#' @name show-FIMatrix
#' @rdname show-FIMatrix
#' @export
#' @param object Object of class FIMatrix
#' @aliases show-FIMatrix show,FIMatrix-method
#' @return Short message stating the number of frequent itemsets.
setMethod("show", "FIMatrix", function(object) {
n <- length(object)
cat("Found", n, "frequent itemset(s). Use print() to display\n")
})
#' Definition of show method for FIMatrix
#'
#' The print function prints out all the frequent itemsets sorted by their support value.
#' @name print-FIMatrix
#' @rdname print-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @param descending Logical value per default TRUE.
#' @aliases print-FIMatrix print,FIMatrix-method
#' @return The frequent itemsets ordererd by their support
#'
setMethod("print", signature(x = "FIMatrix"), function(x, descending = TRUE) {
n <- x@data@Dim[1]
output <- data.frame(items = rep(NA, n), support = rep(NA, n))
for (i in 1:n) {
output[i, 1] <- x@data@Dimnames[[1]][i]
output[i, 2] <- x@support[i]
}
#order output by support before returning (default TRUE)
output <- output[order(output$support, decreasing = descending), ]
print(output)
})
#' Summary for FI-matrices.
#'
#' The summary function gives general information about the frequent itemsets.
#' @name summary-FIMatrix
#' @rdname summary-FIMatrix
#' @export
#' @param object Object of class FIMatrix
#' @aliases summary-FIMatrix summary,FIMatrix-method
#' @return Summary information about the FImatrix
setMethod("summary", signature(object = "FIMatrix"), function(object) {
n <- length(object)
#Overview over frequent itemset matrix
cat("\n")
cat("Frequent itemsets in binary sparse matrix representation \n with",
nrow(object@data), "rows (items) and \n",
ncol(object@data), "columns (frequent itemsets)")
cat("\n")
#avoid unnecessary output when having less than 8 frequent itemsets
if (nrow(object@data) < 8) {
#top n most frequent items
cat("\n")
cat("Most frequent items: \n" )
print(sort(rowSums(object@data), decreasing = T)[1:nrow(object@data)])
cat("\n")
#probability of observing top n items in an itemset
cat("\n")
cat("Observed frequency in frequent itemsets:\n")
print(round(sort(rowSums(object@data), decreasing = T)[1:nrow(object@data)] / n, 4))
cat("\n")
} else {
#top 8 most frequent items
cat("\n")
cat("Most frequent items: \n" )
print(sort(rowSums(object@data), decreasing = T)[1:8])
cat("\n")
#probability of observing top 8 items in an itemset
cat("\n")
cat("Observed frequency in frequent itemsets:\n")
print(round(sort(rowSums(object@data), decreasing = T)[1:8] / n, 4))
cat("\n")
}
#distribution of frequent itemset lengths
cat("\n")
cat("Distribution of itemset length:\n")
print(table(colSums(object@data)))
cat("\n")
#summary statistics on frequent itemset lengths
print(summary(colSums(object@data)))
cat("\n")
#summary statistics on support measure
cat("\n")
cat("Summary of the support measure:\n")
print(summary(object@support))
cat("\n")
})
#' Plot FIMatrix objects
#'
#' The plot function creates a scatter plot with the support on the y-axis and the itemset-length on
#' the x-axis
#' @name plot-FIMatrix
#' @rdname plot-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @param pch Gerneric points function
#' @param col Colour used for plotting
#' @aliases plot-FIMatrix plot,FIMatrix-method
#' @return Scatter plot of itemsize against support
setMethod("plot", signature(x = "FIMatrix"), function(x, pch = 1, col = "red") {
if (length(x) <= 0) {
stop("Object must contain at least one itemset")
} else {
plot(colSums(x@data), x@support,
xlab = "Itemset length", ylab = "Support",
main = "Support distribution by itemset length", pch = pch, col = col, xaxt = "n")
axis(1, at = seq(1, max(colSums(x@data)), by = 1), las = 0)
}
})
#' Plot FIMatrix objects with ggplot2
#'
#' The qplot function can do a scatter plot with the support on the y-axis and the itemset-length on
#' the x-axis or a histogram of itemset lengths
#' @name qplot-FIMatrix
#' @rdname qplot-FIMatrix
#' @param x Object of class FIMatrix
#' @param col colour of data points (only scatter plot) per default "red"
#' @param alpha alpha value for scatter plot per default 0.1
#' @param type character string "hist" or "scatter" depending on wether you want a histogram or scatter plot
#' @aliases qplot-FIMatrix qplot,FIMatrix-method
#' @return Scatter plot of itemset length against support or histogram of itemset lengths
#' @export
#' @examples \donttest{
#' # Plot frequent itemsets as scatter plot.
#' sp <- qlot(FIMatrix)
#' # You can specify the color and alpha value for scatter plots.
#' sp2 <- qplot(FIMatrix, col = "blue", alpha = 1)
#' # Plot frequent itemsets as a histogram
#' hst <- qplot(FIMatrix, type = "hist")
#' }
setMethod("qplot", signature(x = "FIMatrix"),
function(x, col = "red", alpha = 0.1, type = c("hist", "scatter")) {
if (missing(type)) {
type == "scatter"
}
#set up data frame for ggplot
type = type
df <- data.frame(data = colSums(x@data), support = x@support)
if (type == "scatter") {
# Make a scatter plot
ggplot(df, aes(data, support)) +
geom_point(col = col, alpha = alpha) +
labs(x = "Itemset length", y = "support") +
scale_x_continuous(breaks = seq(1, max(colSums(x@data)), by = 1))
} else if (type == "hist") {
# Make a histogram.
ggplot(df, aes(df$data)) +
geom_bar() +
labs(title = "Histogram of itemset lengths", x = "Itemset length") +
theme(plot.title = element_text(hjust = 0.5))
} else {
stop("Please supply a valid 'type' argument 'hist' or 'scatter' ")
}
})
#' Plot a Histogram of itemset lengths for a FIMatrix object
#'
#' The hist function gives a histogram of the lengths of the itemsets.
#' @name hist-FIMatrix
#' @rdname hist-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @aliases hist-FIMatrix hist,FIMatrix-method
#' @return Histogram of Itemsize in FIMatrix.
setMethod("hist", "FIMatrix", function(x) {
if (length(x) <= 0) {
stop("Object must contain at least one itemset")
} else {
hist(colSums(x@data),
main = "Histogram of frequent Itemsets",
xlab = "Itemset length",
col = "lightblue", xaxt = "n")
axis(1, at = seq(1, max(colSums(x@data)), by = 1), las = 0)
}
})
#' Extract the support of itemsets in class FIMatrix
#' @name support-FIMatrix
#' @rdname support-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @aliases support-FIMatrix support,FIMatrix-method
#' @return A numeric vector containing the support values of the itemsets in the FIMatrix.
setMethod("support", "FIMatrix", function(x) {
return(x@support)
})
#' Pruning objects by quality measure(s)
#'
#' With thes function one can eliminate parts of an object that do not fulfill certain threshholds.
#' Currently this function is implemented for the Classes FIMarix and Rules.
#' @name prune
#' @rdname prune
#' @export
#' @param object Objected to be pruned.
#' @param ... measure(s) to prune by
#' @return Pruned object of same class
setGeneric("prune", function(object, ...) standardGeneric("prune"))
#' Prune method for objects of class FIMatrix
#'
#' With this function one can delete all itemsets from an FIMatrix that do not have minimal support.
#' @name prune-FIMatrix
#' @rdname prune-FIMatrix
#' @export
#' @param object Object of class FIMatrix.
#' @aliases prune-FIMatrix prune,FIMatrix-method
#' @param Support Minimal support the pruned FIMatrix should have.
#' @return Pruned object of class FIMatrix
setMethod("prune", "FIMatrix", function(object, Support) {
# Error checking
# Support should be numeric and within (0,1)
if ((!missing(Support)) && is.numeric(Support)) {
if (Support > 1 || Support < 0) {
stop("Supportort should be within (0,1). Pruning aborted.")
}
} else {
if (!missing(Support)) {
stop('The Supportort specified in Support should be numeric!. Pruning aborted.')
}
}
if (!missing(Support)) {
res <- select(object,support(object) >= Support, NULL)
return(res)
} else {
return(object)
}
})
#' Export the item names for a FIMatrix
#' @name items-FIMatrix
#' @rdname items-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @aliases items-FIMatrix items,FIMatrix-method
#' @return Names of all items in FIMatrix.
setMethod("items", signature = signature(x = "FIMatrix"),
function(x) {
return(rownames(x@data))
})
#' Return the colSums for the underlying matrix within an FIMatrix
#'
#' In the matrix underlying the FIMatrix the rows represent the items and the columns to represent
#' the itemsets. Here the sums of all columns should be calculated that are the number of items
#' for each itesmet.
#' @name colSums-FIMatrix
#' @rdname colSums-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @aliases colSums-FIMatrix colSums,FIMatrix-method
#' @return numeric vector containing the sum of each column of the FIMatrix
setMethod("colSums", signature = signature(x = "FIMatrix"),
function(x) {
return(colSums(x@data))
})
#' Return the row Sums for the underlying matrix within an FIMatrix
#'
#' In the matrix underlying the FIMatrix the rows represent the items and the columns to represent
#' the itemsets. Here the sums of each row should be calculated that are the number of occurences
#' of each item.
#' @name rowSums-FIMatrix
#' @rdname rowSums-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @aliases rowSums-FIMatrix rowSums,FIMatrix-method
#' @return numeric vector containing the sum of each row of the FIMatrix
setMethod("rowSums", signature = signature(x = "FIMatrix"),
function(x) {
return(rowSums(x@data))
})
#' Return the number of columns of the underlying matrix in an FIMatrix
#'
#' This number does represent the number of itemsets within that FIMatrix
#' @name ncol-FIMatrix
#' @rdname ncol-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @aliases ncol-FIMatrix ncol,FIMatrix-method
#' @return number of columns / itemsets in the FIMatrix
setMethod("ncol", signature = signature(x = "FIMatrix"),
function(x) {
return(ncol(x@data))
})
#' Return the number of rows of the underlying matrix in an FIMatrix
#'
#' This number does represent the number of items within that FIMatrix
#' @name nrow-FIMatrix
#' @rdname nrow-FIMatrix
#' @export
#' @param x Object of class FIMatrix
#' @aliases nrow-FIMatrix nrow,FIMatrix-method
#' @return number of rows / items in the FIMatrix
setMethod("nrow", signature = signature(x = "FIMatrix"),
function(x) {
return(nrow(x@data))
})
#' Subsetting of an FIMatrix
#'
#' An FImatrix does contain the matrix of itemsets as well as the a vector that contains the support
#' for each itemset. Therefore, both are logically connected and when a FIMatrix is subsetted column-
#' wise the supported vector has to be subsetted as well.
#' @name select-FIMatrix
#' @rdname select-FIMatrix
#' @export
#' @param x Object of class FIMatrix.
#' @param i Either the rows represented by their row number or a logical vector of length number of
#' row of FIMAtrix. If missing or NULL all rows are selected.
#' @param j Either the columns represented by their columns numbers or logical vector of length
#' number of columns in FIMatrix. If missing or NULL all columns are selected.
#' @aliases select-FIMatrix select,FIMatrix-method
#' @return subsetted FIMatrix
setMethod("select", signature = signature(x = "FIMatrix"),
function(x, i, j) {
# Make some sanity checks on i, j.
if (!(missing(i) || is.null(i))) {
if (is.logical(i)) {
if (length(i) > nrow(x)) {
stop(paste('Logical subscript of length', length(i), "too long for FIMatrix with", nrow(x), "rows"))
}
} else {
if (is.numeric(i)) {
if (any(!(i %in% 1:nrow(x)))) {
stop(paste("Subscript is too long. (", paste(i[!i %in% 1:nrow(x)], collapse = ', '),
") cannot be subsetted from FIMatrix with ", nrow(x), ' rows', sep = ''))
}
}
}
}
if (!(missing(j) || is.null(j))) {
if (is.logical(j)) {
if (length(j) > ncol(x)) {
stop(paste('Logical subscript of length', length(j), "too long for FIMatrix with",
ncol(x), "columns"))
}
} else {
if (is.numeric(j)) {
if (any(!(j %in% 1:ncol(x)))) {
stop(paste("Subscript is too long. (", paste(j[!j %in% 1:ncol(x)], collapse = ', '),
") cannot be subsetted from FIMatrix with ", ncol(x), ' columns', sep = ''))
}
}
}
}
# If the matrix does not have row or columns return an empty matrix
if (nrow(x@data) == 0 || ncol(x@data) == 0) {
return(new('FIMatrix',
data = x@data[0, 0, drop = FALSE],
support = x@support[0, drop = FALSE]))
}
# If i is missing use all rows of the input
if (missing(i) || is.null(i)) {
i <- 1:nrow(x@data)
}
# If j is missing use all columns of the input
if (missing(j) || is.null(j)) {
j <- 1:ncol(x@data)
}
# if i, j is logical replace it by the positions of the true values
if (is.logical(i)) {
i <- which(i)
i <- as.numeric(i)
}
if (is.logical(j)) {
j <- which(j)
j <- as.numeric(j)
}
if (length(i) == 0 || length(j) == 0) {
return(new('FIMatrix',
data = x@data[0, 0, drop = FALSE],
support = x@support[0, drop = FALSE]))
}
return(new('FIMatrix',
data = x@data[i, j, drop = FALSE],
support = x@support[j, drop = FALSE]))
})
# ------------------------------------------------------------------------------------------------ #
# ------------------------------ All methods for class Rules ------------------------------------- #
# ------------------------------------------------------------------------------------------------ #
#' Determine the number of rules in a Rules object
#'
#' The length function for the Rules class returns the number of rules.
#' @name length-Rules
#' @rdname length-Rules
#' @param x Object of class Rules
#' @aliases length-Rules length,Rules-method
#' @return Number of Rules in x.
#' @export
setMethod("length", "Rules", function(x) {
x@lhs@Dim[2]
})
#' Definition of show method for Rules
#'
#' The show function prints out the number of Rules
#' @name show-Rules
#' @rdname show-Rules
#' @export
#' @param object Object of class Rules
#' @aliases show-Rules show,Rules-method
#' @return Short message stating the number of Rules.
setMethod("show", "Rules", function(object) {
n <- length(object)
if (n > 0) {
cat("Found", n, "rule(s). Use print() to display\n")
} else {
cat("Found no rules. Try lowering the support and/or confidence threshold.\n")
}
})
#' Definition of show method for Rules
#'
#' The print function prints out all the rules in the object sorted by specified matrices.
#' @name print-Rules
#' @rdname print-Rules
#' @export
#' @param x Object of class Rules
#' @param maxNumConsequent The maximum length of consequents that the rules of the ouput should
#' have. In Default all rules are shown.
#' @param order_by Specifiy up to four metrics out of support, confidence, lift, leverage by which
#' the given rules should be sorted. The first one used first and son on.
#' @param decreasing Should the rules start with the smallest or highest values of the specified
#' metrics?
#' @aliases print-Rules print,Rules-method
#' @return The rules from the left hand and right hand side in the form of {It1, ... ItN} -> {ITK}
#' in a data.frame. This data.frame does have columns lhs, rhs, unnamed, support and confidence.
setMethod("print", "Rules", function(x,maxNumConsequent = 1,
order_by = NULL, decreasing = TRUE) {
if (length(x) == 0) {
return("Found no rules. Try lowering the support and/or confidence threshold.")
} else {
ExtractRules(x, maxNumConsequent = maxNumConsequent,
order_by = order_by, decreasing = decreasing)
}
})
#' Summary funtion for Rules object
#'
#' The summary for the rules object does give some general information on the quality of the rules.
#' @name summary-Rules
#' @rdname summary-Rules
#' @export
#' @param object Object of class Rules
#' @aliases summary-Rules summary,Rules-method
#' @return Summary information about the Rules
setMethod("summary", signature(object = "Rules"), function(object) {
n <- length(object)
quality <- data.frame(support = object@support,
confidence = object@confidence,
lift = object@lift,
leverage = object@leverage)
if (n > 0) {
cat("Found", n, "rule(s). Use print() to display\n")
cat("\n")
#summary statistics on quality measures
cat("\n")
cat("Summary of quality measures:\n")
print(summary(quality))
cat("\n")
} else {
return("Found no rules. Try lowering the support and/or confidence threshold.")
}
})
#' Plot a Rules object
#'
#' The plot function gives a scatter plot with the support on the x-axis, the confidence on the y-axis
#' and the lift as a color gradient.
#' @name plot-Rules
#' @rdname plot-Rules
#' @export
#' @param x Object of class Rules
#' @aliases plot-Rules plot,Rules-method
#' @return Scatter plot of support versus confidence with lift as a color gradient
setMethod("plot", "Rules", function(x) {
if (length(x) <= 0) {
stop("Object must contain at least one rule")
} else {
#color gradient function
colfunc <- colorRampPalette(c("lightblue", "blue"))
#needed for ordering
plot.df <- data.frame(support = x@support,
confidence = x@confidence,
lift = x@lift)
#ordering needed for color gradient
plot.df <- plot.df[order(plot.df$lift), ]
#main scatterplot
layout(matrix(1:2, ncol = 2), widths = c(2, 1), heights = c(1, 1))
plot(plot.df$support,
plot.df$confidence,
xlab = "Support", ylab = "Confidence",
pch = 20, col = colfunc(length(x)))
#gradient legend
legend.raster <- as.raster(matrix(rev(colfunc(length(x)))), ncol = 1)
plot(c(0, 2),
c(0, round(max(x@lift), 2)),
type = "n", axes = F, xlab = "", ylab = "", main = "Lift", adj = 0.225)
text(x = 1.5,
y = seq(0, max(x@lift), l = 3),
labels = seq(round(min(x@lift), 2), round(max(x@lift), 2), l = 3))
rasterImage(legend.raster, 0, 0, 1, round(max(x@lift), 2))
}
})
#' Plot a Rules object wiht ggplot2
#'
#' The plot function gives a scatter plot with the support on the x-axis, the confidence on the y-axis
#' and the lift as a color gradient.
#' @name qplot-Rules
#' @rdname qplot-Rules
#' @export
#' @param x Object of class Rules
#' @aliases qplot-Rules qplot,Rules-method
#' @return Scatter qplot of support versus confidence, lift as a color gradient..
setMethod("qplot", "Rules", function(x) {
quality.df <- data.frame(support = x@support,
confidence = x@confidence,
lift = x@lift,
leverage = x@leverage)
ggplot(quality.df, aes(x = support, y = confidence, color = lift)) +
geom_point() +
scale_color_gradient(low = "lightblue", high = "blue")
})
#' Extract confidence from object
#' @name confidence
#' @rdname confidence
#' @export
#' @param object Object of Class Rules
#' @return Vector of confidence values from all entities of the objects.
setGeneric("confidence", function(object) {
standardGeneric("confidence")
})
#' Extract confidence of all rules within a Rules object.
#' @name confidence-Rules
#' @rdname confidence-Rules
#' @export
#' @param object Object of class Rules
#' @aliases confidence-Rules confidence,Rules-method
#' @return Vector of confidence values from all Rules in x.
setMethod("confidence", "Rules", function(object) {
object@confidence
})
#' Extract lift from object
#' @name lift
#' @rdname lift
#' @export
#' @param object Object of Class Rules
#' @return Vector of lift values from all entities of the objects.
setGeneric("lift", function(object) standardGeneric("lift"))
#' Extract lift of all rules within a Rules object.
#' @name lift-Rules
#' @rdname lift-Rules
#' @export
#' @param object Object of class Rules
#' @aliases lift-Rules lift,Rules-method
#' @return Vector of lift values from all Rules in x.
setMethod("lift", "Rules", function(object) {
object@lift
})
#' Extract leverage from object
#' @name leverage
#' @rdname leverage
#' @export
#' @param object Object of Class Rules
#' @return Vector of leverage values from all entities of the objects.
setGeneric("leverage", function(object) standardGeneric("leverage"))
#' Extract leverage of all rules within a Rules object
#' @name leverage-Rules
#' @rdname leverage-Rules
#' @export
#' @param object Object of class Rules
#' @aliases leverage-Rules leverage,Rules-method
#' @return Vector of leverage values from all Rules in x.
setMethod("leverage", "Rules", function(object) {
object@leverage
})
#' Extract the support of itemsets in class Rules
#' @name support-Rules
#' @rdname support-Rules
#' @export
#' @param x Object of class Rules
#' @aliases support-Rules support,Rules-method
#' @return A numeric vector containing the support values of Rules.
setMethod("support", "Rules", function(x) {
x@support
})
#' Extract FIMatrix object from class
#' @name extract
#' @rdname extract
#' @export
#' @param object Object of Class Rules
#' @return Object of Class FIMatrix
setGeneric("extract", valueClass = "FIMatrix", function(object) {
standardGeneric("extract")
})
#' Extract the FIMatrix from a rules object
#' @name extract-Rules
#' @rdname extract-Rules
#' @export
#' @param object Object of class Rules
#' @aliases extract-Rules extract,Rules-method
#' @return FIMatrix containing the frequent itemsets based on which the rules where calculated.
setMethod("extract", "Rules", function(object) {
object@FrequentItemsets
})
#' Prune method for objects of class Rules
#'
#' With this function one can delete all itemsets from an Rules that do not have minimal support,
#' confidence lift or leverage.
#' @name prune-Rules
#' @rdname prune-Rules
#' @export
#' @aliases prune-Rules prune,Rules-method
#' @param object Object of class Rules
#' @param Support Minimal support the output rules should have.
#' @param Confidence Minimal confidence the output rules should have.
#' @param Lift Minimal Lift the output rules should have.
#' @param Leverage Minimal Leverage the output rules should have.
#' @param inv_Lift Pruning based on minimal or maximal lift?
#' @param inv_lev Pruning based on minimal or maximal leverage?
#' @return Pruned object of class Rules
setMethod("prune", "Rules", function(object, Support, Confidence, Lift, Leverage,
inv_Lift = FALSE, inv_lev = FALSE) {
# Error checking
# Support should be numeric and within (0,1)
if ((!missing(Support)) && is.numeric(Support)) {
if (Support > 1 || Support < 0) {
stop("Supportort should be within (0,1). Pruning aborted.")
}
} else {
if (!missing(Support)) {
stop('The Supportort specified in Support should be numeric!. Pruning aborted.')
}
}
# Confidenceidence should be numeric and within (0,1)
if ((!missing(Confidence)) && is.numeric(Confidence)) {
if (Confidence > 1 || Confidence < 0) {
stop("Confidenceidence should be within (0,1). Pruning aborted.")
}
} else {
if (!missing(Confidence)) {
stop('The Confidenceidence specified in Support should be numeric!. Pruning aborted.')
}
}
# Lift should be numeric
if ((!missing(Lift)) && !is.numeric(Lift)) {
stop("Lift should be numeric. Pruning aborted")
}
# Leverage should be numeric
if ((!missing(Leverage)) && !is.numeric(Leverage)) {
stop("Leverage should be numeric. Pruning aborted")
}
# If non of the paramters is specified all colums / itemsets should be returned.
selection <- rep(TRUE, ncol(object))
if (!missing(Support)) {
selection <- selection & support(object) >= Support
}
if (!missing(Confidence)) {
selection <- selection & confidence(object) >= Confidence
}
if (!missing(Lift)) {
if (!inv_Lift) {
selection <- selection & lift(object) >= Lift
} else {
selection <- selection & lift(object) <= Lift
}
}
if (!missing(Leverage)) {
if (!inv_lev) {
selection <- selection & leverage(object) >= Leverage
} else {
selection <- selection & leverage(object) <= Leverage
}
}
res <- select(object,NULL, selection)
return(res)
})
#' Return the sum of each row for the for either the rhs or lhs of a rule
#'
#' Underlying a rules there is a right-hand side and a left-hand side. Both are stored as matrices.
#' With this function one can calculated the sum of each row, that is the respective number
#' of occurences of each item in all transactions for either the left-hand side or the right-hand side.
#' @name rowSums-Rules
#' @rdname rowSums-Rules
#' @export
#' @param x Object of class Rules
#' @param lhs if true the sum of each rwo of the left-hand side are calculated, else the sum of
#' each row of the right-hand side.
#' @aliases rowSums-Rules rowSums,Rules-method
#' @return numeric vector containing the sum of each row of either the rhs or the lhs.
setMethod("rowSums", signature = signature(x = "Rules"),
function(x, lhs = TRUE) {
if (lhs) {
return(rowSums(x@lhs))
} else {
return(rowSums(x@rhs))
}
})
#' Return the sum of each column for the for either the rhs or lhs of a rule
#'
#' Underlying a rules there is a right-hand side and a left-hand side. Both are stored as matrices.
#' With this function one can calculated the sum of each column, that is the respective number
#' of items within each itemset for either the left-hand side or the right-hand side.
#' @name colSums-Rules
#' @rdname colSums-Rules
#' @export
#' @param x Object of class Rules
#' @param lhs if true the sum of each column of the left-hand side are calculated, else the sum of
#' each column of the right-hand side.
#' @aliases colSums-Rules colSums,Rules-method
#' @return numeric vector containing the sum of each column of either the rhs or the lhs.
setMethod("colSums", signature = signature(x = "Rules"),
function(x, lhs = TRUE) {
if (lhs) {
return(colSums(x@lhs))
} else {
return(colSums(x@rhs))
}
})
#' Export the item names for a Rules object
#' @name items-Rules
#' @rdname items-Rules
#' @export
#' @param x Object of class Rules
#' @aliases items-Rules items,Rules-method
#' @return Vector containing the names of all items in Rules.
setMethod("items", signature = signature(x = "Rules"),
function(x) {
return(rownames(x@lhs))
})
#' Return the number of columns of underlying matrix in an Rules object
#'
#' Although a Rules object does have left-hand side and a right hand-side the number of columns for
#' both does represent the number rules and therefore should be the same for both sides. This functions
#' simply uses the left-hand sides as proxy.
#' @name ncol-Rules
#' @rdname ncol-Rules
#' @export
#' @param x Object of class Rules
#' @aliases ncol-Rules ncol,Rules-method
#' @return number of columns / Rules in the Rules object.
setMethod("ncol", signature = signature(x = "Rules"),
function(x) {
return(ncol(x@lhs))
})
#' Return the number of rows of underlying matrix in an Rules object.
#'
#' Although a Rules object does have left-hand side and a right hand-side the number of rows for
#' both does represent the number items and therefore should be the same for both sides. This functions
#' simply uses the left-hand sides as proxy.
#' @name nrow-Rules
#' @rdname nrow-Rules
#' @export
#' @param x Object of class Rules
#' @aliases nrow-Rules nrow,Rules-method
#' @return number of rows / total number of possible items in the Rules object.
setMethod("nrow", signature = signature(x = "Rules"),
function(x) {
return(nrow(x@lhs))
})
#' Subsetting of a Rules object.
#'
#' A Rules object does contain the matrix of itemsets as well as the a vectors that contains the support,
#' confidence, lift and leverage for all rules.Therefore, both are logically connected and
#' when a Rules is subsetted column- wise the other vectors are subsetted as well.
#' @name select-Rules
#' @rdname select-Rules
#' @export
#' @param x Object of class Rules
#' @param i Either the rows represented by their row number or a logical vector of length number of
#' row of Rules. If i is missing or NULL all rows are selected.
#' @param j Either the columns represented by their columns numbers or logical vector of length
#' number of columns in Rules. If j is missing or NULL all columns are selected.
#' @aliases select-Rules select,Rules-method
#' @return subsetted Rules object.
setMethod("select", signature = signature(x = "Rules"),
function(x, i, j) {
# Make some sanity checks on i, j.
if (!(missing(i) || is.null(i))) {
if (is.logical(i)) {
if (length(i) > nrow(x)) {
stop(paste('Logical subscript of length', length(i), "too long for Rules with", nrow(x), "rows"))
}
} else {
if (is.numeric(i)) {
if (any(!(i %in% 1:nrow(x)))) {
stop(paste("Subscript is too long. (", paste(i[!i %in% 1:nrow(x)], collapse = ', '),
") cannot be subsetted from Rules with ", nrow(x), ' rows', sep = ''))
}
}
}
}
if (!(missing(j) || is.null(j))) {
if (is.logical(j)) {
if (length(j) > ncol(x)) {
stop(paste('Logical subscript of length', length(j), "too long for Rules with",
ncol(x), "columns"))
}
} else {
if (is.numeric(j)) {
if (any(!(j %in% 1:ncol(x)))) {
stop(paste("Subscript is too long. (", paste(j[!j %in% 1:ncol(x)], collapse = ', '),
") cannot be subsetted from Rules with ", ncol(x), ' columns', sep = ''))
}
}
}
}
# If the matrix does not have row or columns return an empty matrix
if (nrow(x@lhs) == 0 || ncol(x@lhs) == 0 || nrow(x@rhs) == 0 || ncol(x@rhs) == 0) {
return(new('Rules',
lhs = x@lhs[0,0,drop = FALSE],
rhs = x@rhs[0,0,drop = FALSE],
support = x@support[0, drop = FALSE],
confidence = x@confidence[0, drop = FALSE],
lift = x@lift[0, drop = FALSE],
leverage = x@leverage[0, drop = FALSE],
itemsetID = x@itemsetID[0, drop = FALSE],
FrequentItemsets = x@FrequentItemsets))
}
# If i is missing use all rows of the input
if (missing(i) || is.null(i)) {
i <- 1:nrow(x@lhs)
}
# If j is missing use all columns of the input
if (missing(j) || is.null(j)) {
j <- 1:ncol(x@lhs)
}
# if i, j is logical replace it by the positions of the true values
if (is.logical(i)) {
i <- which(i)
i <- as.numeric(i)
}
if (is.logical(j)) {
j <- which(j)
j <- as.numeric(j)
}
if (length(i) == 0 || length(j) == 0) {
return(new('Rules',
lhs = x@lhs[0,0,drop = FALSE],
rhs = x@rhs[0,0,drop = FALSE],
support = x@support[0, drop = FALSE],
confidence = x@confidence[0, drop = FALSE],
lift = x@lift[0, drop = FALSE],
leverage = x@leverage[0, drop = FALSE],
itemsetID = x@itemsetID[0, drop = FALSE],
FrequentItemsets = x@FrequentItemsets))
}
return(new('Rules',
lhs = x@lhs[i,j,drop = FALSE],
rhs = x@rhs[i,j,drop = FALSE],
support = x@support[j, drop = FALSE],
confidence = x@confidence[j, drop = FALSE],
lift = x@lift[j, drop = FALSE],
leverage = x@leverage[j, drop = FALSE],
itemsetID = x@itemsetID[j, drop = FALSE],
FrequentItemsets = x@FrequentItemsets))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.