R/is.maximal.R

#######################################################################
# arules - Mining Association Rules and Frequent Itemsets
# Copyright (C) 2011-2015 Michael Hahsler, Christian Buchta,
#			Bettina Gruen and Kurt Hornik
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


#' Find Maximal Itemsets
#'
#' Provides the generic function `is.maximal()` and methods for
#' finding maximal itemsets. Maximal frequent itemsets are used as a concise
#' representation of frequent itemsets. An itemset is maximal in a set if no
#' proper superset of the itemset is contained in the set (Zaki et al., 1997).
#'
#' Maximally frequent itemsets can also be mined directly using
#' [apriori()] or [eclat()] with target "maximally frequent
#' itemsets".
#'
#' We define here maximal rules, as the rules generated by maximal itemsets.
#'
#' @aliases is.maximal 
#' @family postprocessing
#' @family associations functions
#' 
#' @param x the set of [itemsets], [rules] or an [itemMatrix] object.
#' @param ... further arguments.
#' @return a logical vector with the same length as `x` indicating for
#' each element in `x` if it is a maximal itemset.
#' @author Michael Hahsler
#' @references 
#' Mohammed J. Zaki, Srinivasan Parthasarathy, Mitsunori Ogihara,
#' and Wei Li (1997). _New algorithms for fast discovery of association
#' rules_. Technical Report 651, Computer Science Department, University of
#' Rochester, Rochester, NY 14627.
#' @keywords models
setGeneric("is.maximal",
  function(x, ...)
    standardGeneric("is.maximal"))

#' @rdname is.maximal
setMethod("is.maximal", signature(x = "itemMatrix"),
  function(x) {
    u <- unique(x)
    m <-
      .Call(R_pncount, u@data, u@data, TRUE, TRUE, FALSE) == 1
    i <- match(x, u)
    structure(m[i], names = labels(x))
  })

#' @rdname is.maximal
setMethod("is.maximal", signature(x = "itemsets"),
  function(x)
    is.maximal(items(x)))

#' @rdname is.maximal
setMethod("is.maximal", signature(x = "rules"),
  function(x)
    is.maximal(items(x)))


## old code w/o prefix tree
#setMethod("is.maximal", signature(x = "itemsets"),
#    function(x, blocksize = 200) is.maximal(items(x), blocksize)
#)

#setMethod("is.maximal", signature(x = "itemMatrix"),
#    function(x, blocksize = 200) {
#        ## rowSums(is.subset(x, x, proper = TRUE)) == 0
#
#        ## for large x we use blocks now
#        ## memory requirements for is.subset (see is.super.R)
#        ## is approx. nx^2 * (8 + 4) byte
#
#        nx <- length(x)
#        blocksize <- floor(blocksize * 1024 * 1024 / nx / 12)
#
#        if(blocksize < 1)
#        stop("Length of x is to large. Increase usable memory blocksize!")
#
#        ## do it in one run
#        if(nx <= blocksize)
#        return(rowSums(is.subset(x, proper = TRUE)) == 0)
#
#        ## do it in blocks
#        ismax <- logical(nx)
#
#        blockStart <- 1
#        while(blockStart < nx) {
#            cat(blockStart,"\n")
#            blockEnd <- min(blockStart+blocksize, nx)
#            ismax[blockStart:blockEnd] <-
#            rowSums(is.subset(x[blockStart:blockEnd], x,
#                    proper = TRUE)) == 0
#            blockStart <- blockEnd
#        }
#
#        return(ismax)
#    })
#
mhahsler/arules documentation built on March 19, 2024, 5:45 p.m.