R/aab-helper.R
In partitionComparison: Implements Measures for the Comparison of Two Partitions

Documented in projectionNumber registerPartitionVectorSignatures

#' Compute the length of the overlap of two sets
#' 
#' Given two partitions (p, q) represented as vectors of cluster ids,
#' compute the overlap between clusters i and j.
#' 
#' @param p Partition \eqn{P}
#' @param q Partition \eqn{Q}
#' @param i Cluster id of \eqn{P}
#' @param j Cluster id of \eqn{Q}
#' 
#' @template author
#' 
#' @noRd
setOverlap <- function(p, q, i, j) sum(p == i & q == j)


#' Compute the projection number of two partitions
#' 
#' Given two partitions (p, q) represented as vectors of cluster ids,
#' compute the projection number which is the sum of maximum
#' cluster overlaps for all clusters of \eqn{P} to any cluster of \eqn{Q}.
#' 
#' @param p Partition \eqn{P}
#' @param q Partition \eqn{Q}
#' 
#' @template author
#' @examples 
#' isTRUE(all.equal(projectionNumber(c(0, 0, 0, 1, 1), c(0, 0, 1, 1, 1)), 4))
#' 
#' @seealso \code{\link{dongensMetric}}
#' 
#' @export
projectionNumber <- function(p, q) {
  # Gets the (arbitrary) cluster ids
  clusters_p <- unique(p)
  clusters_q <- unique(q)
  
  # Computes the sum over the maximum overlaps
  sum(sapply(clusters_p, 
             function(i) max(sapply(clusters_q, 
                                    function(j) setOverlap(p, q, i, j)))))
}


#' Entropy
#' 
#' Compute the Shannon entropy
#' \deqn{-\sum_{i} p_i \log_b p_i}
#' 
#' @section Hint:
#' This method is used internally for measures based on information theory
#' 
#' @param x A probability distribution
#' @param log_base Optional base of the logarithm (default: \eqn{e})
#' @template author
#' @name entropy
#' @examples 
#' isTRUE(all.equal(entropy(c(.5, .5)), log(2)))
#' isTRUE(all.equal(entropy(c(.5, .5), 2), 1))
#' isTRUE(all.equal(entropy(c(.5, .5), 4), .5))
#' 
#' @export
setGeneric("entropy", function(x, log_base) standardGeneric("entropy"))

#' @rdname entropy
setMethod("entropy", signature(x="numeric", log_base="numeric"), 
          function(x, log_base) {
            if (!isTRUE(all.equal(sum(x), 1)))
              stop("x is not a distribution that sums to 1")
            
            -sum(sapply(x[x > 0], function(p) p * log(p, base = log_base)))
          })

#' @examples 
#' # Entropy of a partition
#' isTRUE(all.equal(entropy(new("Partition", c(0, 0, 1, 1, 1))), entropy(c(2/5, 3/5))))
#' 
#' @describeIn entropy Entropy of a partition represented by \code{x}
setMethod("entropy", signature(x="Partition", log_base="numeric"), 
          function(x, log_base) entropy(as.vector(table(x) / length(x)), log_base))

#' @rdname entropy
setMethod("entropy", signature(x="ANY", log_base="missing"), 
          function(x, log_base=exp(1)) entropy(x, log_base=exp(1)))


# Internal function to create a 'proxy' function for the given
# function. The result is a new function with exactly the same
# signature but the first two arguments are replaced by instances
# of Partition.
.createProxyFunction <- function(f) {
  f_formals <- formals(f)  # Get the formals for the method,...
  formal_names <- names(f_formals)  # ...and its names.
  # Create a list of symbols from the formals (to be used in 'do.call')
  formal_symbol_list <- lapply(formal_names, as.symbol)
  
  # Create the new proxy function
  if (length(formal_symbol_list) > 2) {
    func <- function() {
      # Cast to partition
      x <- new("Partition", get(formal_names[1]))
      y <- new("Partition", get(formal_names[2]))
      do.call(f, c(list(x, y), formal_symbol_list[3:length(formal_symbol_list)]))
    }
  } else {
    func <- function() {
      # Cast to partition
      x <- new("Partition", get(formal_names[1]))
      y <- new("Partition", get(formal_names[2]))
      f(x, y)
    }
  }
  
  # Finally, set the 'original' formals to the proxy function
  formals(func) <- f_formals
  
  func
}


#' Make comparison measures usable with any vectors
#' 
#' The comparison measures are defined to use the class \linkS4class{Partition} 
#' as parameters. If you do not want to explicitly convert an arbitrary vector 
#' of class labels (probably as a result from another package's algorithm) into
#' a \linkS4class{Partition} instance, calling this function will create methods
#' for all measures that allow "ANY" input which is implicitly converted to
#' \linkS4class{Partition}.
#' 
#' @param e The environment to register the methods in 
#' (mostly \code{environment()} is fine)
#' 
#' @template author
#' @examples 
#' library(partitionComparison)
#' randIndex(new("Partition", c(0, 0, 0, 1, 1)), new("Partition", c(0, 0, 1, 1, 1)))
#' # [1] 0.6
#' \dontrun{randIndex(c(0, 0, 0, 1, 1), c(0, 0, 1, 1, 1))}
#' # Error in (function (classes, fdef, mtable) :
#' # unable to find an inherited method for function 'randIndex' for signature '"numeric", "numeric"'
#' registerPartitionVectorSignatures(environment())
#' randIndex(c(0, 0, 0, 1, 1), c(0, 0, 1, 1, 1))
#' # [1] 0.6
#' 
#' @export
registerPartitionVectorSignatures <- function(e) {
  # Get all S4 method names defined for "Partition"
  method_info <- attr(.S4methods(class="Partition"), "info")
  partition_methods <- unique(method_info[method_info$from == "partitionComparison",]$generic)

  for (method_name in partition_methods) {
    # Get all methods that have a Partition class in their signature and the given name
    methods_to_overload <- findMethods(method_name, classes = "Partition")

    for (method_definition in methods_to_overload) {
      method_signature <- method_definition@target
      
      # If the first two parameters are of type "Partition", we assume it is
      # a measure and register a method with an altered signature 
      # ("Partition" is replaced by "vector") and wrap the call to the 'original'
      # method.
      if (length(method_signature) >= 2 & 
          method_signature[1] == "Partition" & 
          method_signature[2] == "Partition") {
        method_signature[1] <- method_signature[2] <- "vector"

        setMethod(method_name, method_signature, 
                  .createProxyFunction(method_definition), where = e)
      }
    }
  }
}


#' Compare two partitions with all measures
#' 
#' Compute the comparison between two partitions for all available measures.
#' 
#' 
#' @param p The partition \eqn{P}
#' @param q The partition \eqn{Q}
#' 
#' @return 
#' Instance of \code{\link{data.frame}} with columns \code{measure} and \code{value}
#' 
#' @section Warning:
#' This method will identify every generic S4 method that has a signature 
#' \code{"Partition", "Partition"} (including signatures with following \code{"missing"} 
#' parameters, e.g. \code{"Partition", "Partition", "missing"}) as a partition 
#' comparison measure, \strong{except} this method itself (otherwise: infinite 
#' recursion). This means one has to take care when defining other methods with the same
#' signature in order not to produce unwanted side-effects!
#' 
#' @template author
#' @name compareAll
#' 
#' @examples 
#' compareAll(new("Partition", c(0, 0, 0, 1, 1)), new("Partition", c(0, 0, 1, 1, 1)))
#' \dontrun{
#'                         measure       value
#'  1            adjustedRandIndex 0.166666667
#'  2                     baulieu1 0.760000000
#'  3                     baulieu2 0.040000000
#'  4  classificationErrorDistance 0.200000000
#'  5                  czekanowski 0.500000000
#'  6                dongensMetric 2.000000000
#'  7                 fagerMcGowan 0.250000000
#'  8          folwkesMallowsIndex 0.500000000
#'  9              gammaStatistics 0.166666667
#'  10              goodmanKruskal 0.333333333
#'  11               gowerLegendre 0.750000000
#'  12                      hamann 0.200000000
#'  13          jaccardCoefficient 0.333333333
#'  14                  kulczynski 0.500000000
#'  15                  larsenAone 0.800000000
#'  16                 lermanIndex 0.436435780
#'  17                mcconnaughey 0.000000000
#'  18            minkowskiMeasure 1.000000000
#'  19                mirkinMetric 8.000000000
#'  20           mutualInformation 0.291103166
#'  21       normalizedLermanIndex 0.166666667
#'  22 normalizedMutualInformation 0.432538068
#'  23                     pearson 0.006944444
#'  24                      peirce 0.166666667
#'  25                   randIndex 0.600000000
#'  26              rogersTanimoto 0.428571429
#'  27                   russelRao 0.200000000
#'  28               rvCoefficient 0.692307692
#'  29                sokalSneath1 0.583333333
#'  30                sokalSneath2 0.200000000
#'  31                sokalSneath3 0.333333333
#'  32      variationOfInformation 0.763817002
#'  33                    wallaceI 0.500000000
#'  34                   wallaceII 0.500000000
#' }
#' 
#' @export
setGeneric("compareAll", function(p, q) standardGeneric("compareAll"))

#' @describeIn compareAll Compare given two \code{\linkS4class{Partition}} instances
setMethod("compareAll", signature(p="Partition", q="Partition"), 
          function(p, q) {
            result.df <- data.frame(measure=character(), value=numeric(), stringsAsFactors = FALSE)
  
            method_info <- attr(.S4methods(class="Partition"), "info")
            partition_methods <- unique(method_info[method_info$from == "partitionComparison",]$generic)
            
            for (method_name in partition_methods) {
              if (method_name == "compareAll") next
              # Get all methods that have a Partition class in their signature and 
              # the given name
              methods_to_overload <- findMethods(method_name, classes = "Partition")
              
              for (method_definition in methods_to_overload) {
                method_signature <- method_definition@target
                
                # If the first two parameters are of type "Partition", we assume it is
                # a measure and register a method with an altered signature 
                # ("Partition" is replaced by "vector") and wrap the call to the 'original'
                # method.
                if ((length(method_signature) == 2) & 
                    method_signature[1] == "Partition" & 
                    method_signature[2] == "Partition") {
                  result.df[nrow(result.df)+1,] <- list(method_name, 
                                                        do.call(method_name, list(p, q)))
                } else if ((length(method_signature) > 2) & 
                           method_signature[1] == "Partition" & 
                           method_signature[2] == "Partition" &
                           all(method_signature[3:length(method_signature)] == "missing")) {
                  result.df[nrow(result.df)+1,] <- list(method_name, 
                                                        do.call(method_name, list(p, q)))
                }
              }
            }
            
            result.df
          })

Any scripts or data that you put into this service are public.

partitionComparison documentation built on Aug. 24, 2023, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

partitionComparison
Implements Measures for the Comparison of Two Partitions

R/aab-helper.R
In partitionComparison: Implements Measures for the Comparison of Two Partitions

Defines functions registerPartitionVectorSignatures .createProxyFunction projectionNumber setOverlap

Documented in projectionNumber registerPartitionVectorSignatures

Try the partitionComparison package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

partitionComparison Implements Measures for the Comparison of Two Partitions

R/aab-helper.R In partitionComparison: Implements Measures for the Comparison of Two Partitions

Defines functions registerPartitionVectorSignatures .createProxyFunction projectionNumber setOverlap

Documented in projectionNumber registerPartitionVectorSignatures

Try the partitionComparison package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

partitionComparison
Implements Measures for the Comparison of Two Partitions

R/aab-helper.R
In partitionComparison: Implements Measures for the Comparison of Two Partitions