R/representation-vector-methods.R
In rgudhi: An Interface to the GUDHI Library for Topological Data Analysis

#' Vector Representation Step
#'
#' @param diag A 2-column [tibble::tibble] specifying a persistence diagram.
#' @param X A list of 2-column [tibble::tibble]s specifying a sample of
#'   persistence diagrams.
#' @param y An integer vector specifying persistence diagram labels (unused for
#'   now).
#'
#' @author Mathieu Carrière
#' @keywords internal
VectorRepresentationStep <- R6::R6Class(
  classname = "VectorRepresentationStep",
  inherit = SKLearnClass,
  public = list(
    #' @description Applies the class on a single persistence diagram and
    #'   outputs the result.
    #'
    #' @return A [tibble::tibble] storing the requested vector representation of
    #'   the persistence diagram in a table suitable for visualization.
    apply = function(diag) {
      diag |>
        as.matrix() |>
        super$apply() |>
        private$convert_output()
    },

    #' @description Fits the class on a sample of persistence diagrams.
    #'
    #' @return The class itself invisibly.
    fit = function(X, y = NULL) {
      X |>
        purrr::map(as.matrix) |>
        super$fit(y)
      invisible(self)
    },

    #' @description Applies the class on a sample of persistence diagrams.
    #'
    #' @return A list of [tibble::tibble]s storing the requested vector
    #'   representations of the persistence diagrams in a table suitable for
    #'   visualization.
    transform = function(X) {
      X |>
        purrr::map(as.matrix) |>
        super$transform() |>
        private$convert_output()
    },

    #' @description Applies sequentially the `$fit()` and `$transform()` methods
    #'   on a sample of persistence diagrams in a more efficient way than
    #'   calling them directly.
    #'
    #' @return A list of [tibble::tibble]s storing the requested vector
    #'   representations of the persistence diagrams in a table suitable for
    #'   visualization.
    fit_transform = function(X, y = NULL) {
      X <- purrr::map(X, as.matrix)
      super$fit(X, y)
      X |>
        super$transform() |>
        private$convert_output()
    }
  ),
  private = list(
    convert_output = function(x) {
      if (is.list(x)) purrr::map(x, private$convert_output)
      repr_values <- if (length(dim(x)) == 1) x else purrr::array_tree(x, margin = 2)
      py_cls <- super$get_python_class()
      if ("grid_" %in% names(py_cls))
        grid <- as.numeric(py_cls$grid_)
      else if ("sample_range" %in% names(py_cls) && !anyNA(py_cls$sample_range))
        grid <- seq(
          from = py_cls$sample_range[1],
          to = py_cls$sample_range[2],
          length.out = py_cls$resolution
        )
      else if ("im_range" %in% names(py_cls))
        grid <- purrr::cross_df(list(
          X = seq(
            from = py_cls$im_range[1],
            to = py_cls$im_range[3],
            length.out = py_cls$resolution[1]
          ),
          Y = seq(
            from = py_cls$im_range[2],
            to = py_cls$im_range[4],
            length.out = py_cls$resolution[2]
          )
        ))
      else
        grid <- 1:length(repr_values)

      if ("num_landscapes" %in% names(py_cls)) {
        num_landscapes <- py_cls$num_landscapes
        out <- tibble::tibble(
          Grid = rep(grid, times = num_landscapes),
          LandscapeId = rep(1:num_landscapes, each = py_cls$resolution),
          Value = repr_values
        )
      }
      else if ("im_range" %in% names(py_cls)) {
        out <- grid
        out$Value <- repr_values
      }
      else
        out <- tibble::tibble(Grid = grid, Value = repr_values)
      out
    }
  )
)

#' Vector Representation: Atol
#'
#' @description Computes measure vectorization (e.g. point clouds, persistence
#'   diagrams, etc.) after a quantisation step according to the Atol algorithm
#'   \insertCite{royer2021atol}{rgudhi}.
#'
#' ## References
#' \insertCited{}
#'
#' @author Mathieu Carrière
#'
#' @export
Atol <- R6::R6Class(
  classname = "Atol",
  inherit = VectorRepresentationStep,
  public = list(
    #' @description The [`Atol`] constructor.
    #'
    #' @param quantiser An object of class [`BaseClustering`] specifying
    #'   any clustering algorithm from the
    #'   [**sklearn.cluster**](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.cluster)
    #'   module. It will be fitted when the `$fit()` method is called.
    #' @param weighting_method A string specifying the constant generic function
    #'   for weighting the measure points. Choices are either `"cloud"` or
    #'   `"iidproba"`. Defaults to `"cloud"`, i.e. the measure is seen as a
    #'   point cloud. This will have no impact if weights are provided along
    #'   with measures all the way, i.e. at `$fit()` and `$transform()` calls,
    #'   through the optional argument `sample_weight`.
    #' @param contrast A string specifying the constant function for evaluating
    #'   proximity of a measure with respect to centers. Choices are either
    #'   `"gaussian"` or `"laplacian"` or `"indicator"`. Defaults to
    #'   `"gaussian"` (see page 3 in
    #'   \insertCite{royer2021atol;textual}{rgudhi}).
    #'
    #' @return An object of class [`Atol`].
    #'
    #' @examplesIf reticulate::py_module_available("gudhi")
    #' X <- seq_circle(10)
    #' ac <- AlphaComplex$new(points = X)
    #' st <- ac$create_simplex_tree()
    #' dgm <- st$compute_persistence()$persistence_intervals_in_dimension(0)
    #' ds <- DiagramSelector$new(use = TRUE)
    #' dgm <- ds$apply(dgm)
    #' km <- KMeans$new(n_clusters = 2, random_state = 202006)
    #' vr <- Atol$new(quantiser = km)
    #' # vr$apply(dgm) # TODO: needs fix in python
    #' vr$fit_transform(list(dgm))
    initialize = function(quantiser,
                          weighting_method = c("cloud", "iidproba"),
                          contrast = c("gaussian", "laplacian", "indicator")) {
      quantiser <- quantiser$get_python_class()
      weighting_method <- rlang::arg_match(weighting_method)
      contrast <- rlang::arg_match(contrast)
      super$set_python_class(
        gd$representations$Atol(
          quantiser = quantiser,
          weighting_method = weighting_method,
          contrast = contrast
        )
      )
    }
  )
)

#' Vector Representation: Betti Curve
#'
#' @description Computes Betti curves from persistence diagrams. There are
#'   several modes of operation: with a given resolution (with or without a
#'   `sample_range`), with a predefined grid, and with none of the previous.
#'   With a predefined grid, the class computes the Betti numbers at those grid
#'   points. Without a predefined grid, if the resolution is set to `NULL`, it
#'   can be fit to a list of persistence diagrams and produce a grid that
#'   consists of (at least) the filtration values at which at least one of those
#'   persistence diagrams changes Betti numbers, and then compute the Betti
#'   numbers at those grid points. In the latter mode, the exact Betti curve is
#'   computed for the entire real line. Otherwise, if the resolution is given,
#'   the Betti curve is obtained by sampling evenly using either the given
#'   `sample_range` or based on the persistence diagrams.
#'
#' @author Mathieu Carrière
#' @export
BettiCurve <- R6::R6Class(
  classname = "BettiCurve",
  inherit = VectorRepresentationStep,
  public = list(
    #' @description The [`BettiCurve`] constructor.
    #'
    #' @param resolution An integer value specifying the number of sample for
    #'   the piecewise constant function. Defaults to `100L`.
    #' @param sample_range A length-2 numeric vector specifying the minimum and
    #'   maximum of the piecewise constant function domain, of the form
    #'   \eqn{[x_{\min}, x_{\max}]}. Defaults to `rep(NA, 2)`. It is the
    #'   interval on which samples will be drawn evenly. If one of the values is
    #'   `NA`, it can be computed from the persistence diagrams with the
    #'   `$fit()` method.
    #' @param predefined_grid A numeric vector specifying a predefined grid of
    #'   points at which to compute the Betti curves. Must be strictly ordered.
    #'   Infinities are ok. If set to `NULL` (default), and resolution is given,
    #'   the grid will be uniform from \eqn{x_{\min}} to \eqn{x_{\max}} in
    #'   `resolution` steps, otherwise a grid will be computed that captures all
    #'   changes in Betti numbers in the provided data.
    #'
    #' @return An object of class [`BettiCurve`].
    #'
    #' @examplesIf reticulate::py_module_available("gudhi")
    #' X <- seq_circle(10)
    #' ac <- AlphaComplex$new(points = X)
    #' st <- ac$create_simplex_tree()
    #' dgm <- st$compute_persistence()$persistence_intervals_in_dimension(0)
    #' ds <- DiagramSelector$new(use = TRUE)
    #' dgm <- ds$apply(dgm)
    #' bc <- BettiCurve$new()
    #' bc$apply(dgm)
    #' bc$fit_transform(list(dgm))
    initialize = function(resolution = 100,
                          sample_range = rep(NA, 2),
                          predefined_grid = NULL) {
      resolution <- as.integer(resolution)
      super$set_python_class(
        gd$representations$BettiCurve(
          resolution = resolution,
          sample_range = sample_range,
          predefined_grid = predefined_grid
        )
      )
    }
  )
)

#' Vector Representation: Complex Polynomial
#'
#' @description Computes complex polynomials from a list of persistence
#'   diagrams. The persistence diagram points are seen as the roots of some
#'   complex polynomial, whose coefficients are returned in a complex vector.
#'   See https://link.springer.com/chapter/10.1007%2F978-3-319-23231-7_27 for
#'   more details.
#'
#' @author Mathieu Carrière
#' @export
ComplexPolynomial <- R6::R6Class(
  classname = "ComplexPolynomial",
  inherit = VectorRepresentationStep,
  public = list(
    #' @description The [`ComplexPolynomial`] constructor.
    #'
    #' @param polynomial_type A string specifying the Type of complex polynomial
    #'   that is going to be computed (explained in
    #'   https://link.springer.com/chapter/10.1007%2F978-3-319-23231-7_27).
    #'   Choices are `c("R", "S", "T")`. Defaults to `"R"`.
    #' @param threshold An integer value specifying the number of coefficients.
    #'   This is the dimension of the complex vector of coefficients, i.e. the
    #'   number of coefficients corresponding to the largest degree terms of the
    #'   polynomial. If `-1`, this threshold is computed from the list of
    #'   persistence diagrams by considering the one with the largest number of
    #'   points and using the dimension of its corresponding complex vector of
    #'   coefficients as threshold. Defaults to `10L`.
    #'
    #' @return An object of class [`ComplexPolynomial`].
    #'
    #' @examplesIf reticulate::py_module_available("gudhi")
    #' X <- seq_circle(10)
    #' ac <- AlphaComplex$new(points = X)
    #' st <- ac$create_simplex_tree()
    #' dgm <- st$compute_persistence()$persistence_intervals_in_dimension(0)
    #' ds <- DiagramSelector$new(use = TRUE)
    #' dgm <- ds$apply(dgm)
    #' cp <- ComplexPolynomial$new()
    #' cp$apply(dgm)
    #' cp$fit_transform(list(dgm))
    initialize = function(polynomial_type = c("R", "S", "T"), threshold = 10) {
      polynomial_type <- rlang::arg_match(polynomial_type)
      threshold <- as.integer(threshold)
      super$set_python_class(
        gd$representations$ComplexPolynomial(
          polynomial_type = polynomial_type,
          threshold = threshold
        )
      )
    }
  )
)

#' Vector Representation: Entropy
#'
#' @description Computes persistence entropy. Persistence entropy is a statistic
#'   for persistence diagrams inspired from Shannon entropy. This statistic can
#'   also be used to compute a feature vector, called the entropy summary
#'   function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note
#'   that a previous implementation was contributed by Manuel Soriano-Trigueros.
#'
#' @author Mathieu Carrière
#' @export
Entropy <- R6::R6Class(
  classname = "Entropy",
  inherit = VectorRepresentationStep,
  public = list(
    #' @description The [`Entropy`] constructor.
    #'
    #' @param mode A string specifying which entropy to compute: either
    #'   `"scalar"` for computing the entropy statistic, or `"vector"` for
    #'   computing the entropy summary function. Defaults to `"scalar"`.
    #' @param normalized A boolean value specifying whether to normalize the
    #'   entropy summary function. Defaults to `TRUE`. Used only if `mode ==
    #'   "vector"`.
    #' @param resolution An integer value specifying the grid size for the
    #'   entropy summary function. Defaults to `100L`. Used only if `mode ==
    #'   "vector"`.
    #' @param sample_range A length-2 numeric vector specifying the domain for
    #'   the entropy summary function, of the form \eqn{[x_{\min}, x_{\max}]}.
    #'   Defaults to `rep(NA, 2)`. It is the interval on which samples will be
    #'   drawn evenly. If one of the values is `NA`, it can be computed from the
    #'   persistence diagrams with the `$fit()` method. Used only if `mode ==
    #'   "vector"`.
    #'
    #' @return An object of class [`Entropy`].
    #'
    #' @examplesIf reticulate::py_module_available("gudhi")
    #' X <- seq_circle(10)
    #' ac <- AlphaComplex$new(points = X)
    #' st <- ac$create_simplex_tree()
    #' dgm <- st$compute_persistence()$persistence_intervals_in_dimension(0)
    #' ds <- DiagramSelector$new(use = TRUE)
    #' dgm <- ds$apply(dgm)
    #' ent <- Entropy$new()
    #' ent$apply(dgm)
    #' ent$fit_transform(list(dgm))
    initialize = function(mode = c("scalar", "vector"),
                          normalized = TRUE,
                          resolution = 100,
                          sample_range = rep(NA_real_, 2)) {
      mode <- rlang::arg_match(mode)
      resolution <- as.integer(resolution)
      super$set_python_class(
        gd$representations$Entropy(
          mode = mode,
          normalized = normalized,
          resolution = resolution,
          sample_range = sample_range
        )
      )
    }
  )
)

#' Vector Representation: Landscape
#'
#' @description Computes persistence landscapes from a list of persistence
#'   diagrams. A persistence landscape is a collection of 1D piecewise-linear
#'   functions computed from the rank function associated to the persistence
#'   diagram. These piecewise-linear functions are then sampled evenly on a
#'   given range and the corresponding vectors of samples are concatenated and
#'   returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
#'
#' @author Mathieu Carrière
#' @export
Landscape <- R6::R6Class(
  classname = "Landscape",
  inherit = VectorRepresentationStep,
  public = list(
    #' @description The [`Landscape`] constructor.
    #'
    #' @param num_landscapes An integer value specifying the number of piecewise
    #'   linear functions to output. Defaults to `5L`.
    #' @param resolution An integer value specifying the grid size for the
    #'   landscapes. Defaults to `100L`.
    #' @param sample_range A length-2 numeric vector specifying the domain for
    #'   the entropy summary function, of the form \eqn{[x_{\min}, x_{\max}]}.
    #'   Defaults to `rep(NA, 2)`. It is the interval on which samples will be
    #'   drawn evenly. If one of the values is `NA`, it can be computed from the
    #'   persistence diagrams with the `$fit()` method.
    #'
    #' @return An object of class [`Landscape`].
    #'
    #' @examplesIf reticulate::py_module_available("gudhi")
    #' X <- seq_circle(10)
    #' ac <- AlphaComplex$new(points = X)
    #' st <- ac$create_simplex_tree()
    #' dgm <- st$compute_persistence()$persistence_intervals_in_dimension(0)
    #' ds <- DiagramSelector$new(use = TRUE)
    #' dgm <- ds$apply(dgm)
    #' lds <- Landscape$new()
    #' lds$apply(dgm)
    #' lds$fit_transform(list(dgm))
    initialize = function(num_landscapes = 5,
                          resolution = 100,
                          sample_range = rep(NA_real_, 2)) {
      num_landscapes <- as.integer(num_landscapes)
      resolution <- as.integer(resolution)
      super$set_python_class(
        gd$representations$Landscape(
          num_landscapes = num_landscapes,
          resolution = resolution,
          sample_range = sample_range
        )
      )
    }
  )
)

#' Vector Representation: Persistence Image
#'
#' @description Computes persistence images from a list of persistence diagrams.
#'   A persistence image is a 2D function computed from a persistence diagram by
#'   convolving the diagram points with a weighted Gaussian kernel. The plane is
#'   then discretized into an image with pixels, which is flattened and returned
#'   as a vector. See http://jmlr.org/papers/v18/16-337.html for more details.
#'
#' @author Mathieu Carrière
#' @export
PersistenceImage <- R6::R6Class(
  classname = "PersistenceImage",
  inherit = VectorRepresentationStep,
  public = list(
    #' @description The [`PersistenceImage`] constructor.
    #'
    #' @param bandwidth A numeric value specifying the bandwidth of the Gaussian
    #'   kernel. Defaults to `1.0`.
    #' @param weight A function or a formula coercible into a function via
    #'   [rlang::as_function()] specifying the weight function for the
    #'   persistence diagram points. Defaults to the constant function `~ 1`.
    #'   This function must be defined on 2D points, i.e. lists or arrays of the
    #'   form \eqn{[p_x,p_y]}.
    #' @param resolution An length-1 integer vector specifying the size (in
    #'   pixels) of the persistence image. Defaults to `rep(20L, 2)`.
    #' @param im_range A length-4 numeric vector specifying the two-dimensional
    #'   domain for the persistence image, of the form \eqn{[x_{\min}, y_{\min},
    #'   x_{\max}, y_{\max}]}. Defaults to `rep(NA, 4)`. If one of the values is
    #'   `NA`, it can be computed from the persistence diagrams with the
    #'   `$fit()` method.
    #'
    #' @return An object of class [`PersistenceImage`].
    #'
    #' @examplesIf reticulate::py_module_available("gudhi")
    #' X <- seq_circle(10)
    #' ac <- AlphaComplex$new(points = X)
    #' st <- ac$create_simplex_tree()
    #' dgm <- st$compute_persistence()$persistence_intervals_in_dimension(0)
    #' ds <- DiagramSelector$new(use = TRUE)
    #' dgm <- ds$apply(dgm)
    #' pei <- PersistenceImage$new()
    #' pei$apply(dgm)
    #' pei$fit_transform(list(dgm))
    initialize = function(bandwidth = 1.0,
                          weight = ~ 1,
                          resolution = c(20, 20),
                          im_range = rep(NA_real_, 4)) {
      weight <- rlang::as_function(weight)
      resolution <- as.integer(resolution)
      super$set_python_class(
        gd$representations$PersistenceImage(
          bandwidth = bandwidth,
          weight = weight,
          resolution = resolution,
          im_range = im_range
        )
      )
    }
  )
)

#' Vector Representation: Silhouette
#'
#' @description Computes persistence silhouettes from a list of persistence
#'   diagrams. A persistence silhouette is computed by taking a weighted average
#'   of the collection of 1D piecewise-linear functions given by the persistence
#'   landscapes, and then by evenly sampling this average on a given range.
#'   Finally, the corresponding vector of samples is returned. See
#'   https://arxiv.org/abs/1312.0308 for more details.
#'
#' @author Mathieu Carrière
#' @export
Silhouette <- R6::R6Class(
  classname = "Silhouette",
  inherit = VectorRepresentationStep,
  public = list(
    #' @description The [`Silhouette`] constructor.
    #'
    #' @param weight A function or a formula coercible into a function via
    #'   [rlang::as_function()] specifying the weight function for the
    #'   persistence diagram points. Defaults to the constant function `~ 1`.
    #'   This function must be defined on 2D points, i.e. lists or arrays of the
    #'   form \eqn{[p_x,p_y]}.
    #' @param resolution An length-1 integer vector specifying the size (in
    #'   pixels) of the persistence image. Defaults to `rep(20L, 2)`.
    #' @param sample_range A length-2 numeric vector specifying the domain for
    #'   the entropy summary function, of the form \eqn{[x_{\min}, x_{\max}]}.
    #'   Defaults to `rep(NA, 2)`. It is the interval on which samples will be
    #'   drawn evenly. If one of the values is `NA`, it can be computed from the
    #'   persistence diagrams with the `$fit()` method.
    #'
    #' @return An object of class [`Silhouette`].
    #'
    #' @examplesIf reticulate::py_module_available("gudhi")
    #' X <- seq_circle(10)
    #' ac <- AlphaComplex$new(points = X)
    #' st <- ac$create_simplex_tree()
    #' dgm <- st$compute_persistence()$persistence_intervals_in_dimension(0)
    #' ds <- DiagramSelector$new(use = TRUE)
    #' dgm <- ds$apply(dgm)
    #' sil <- Silhouette$new()
    #' sil$apply(dgm) # TO DO: fix gd because it does not set sample_range automatically
    #' sil$fit_transform(list(dgm))
    initialize = function(weight = ~ 1,
                          resolution = 100,
                          sample_range = rep(NA_real_, 2)) {
      weight <- rlang::as_function(weight)
      resolution <- as.integer(resolution)
      super$set_python_class(
        gd$representations$Silhouette(
          weight = weight,
          resolution = resolution,
          sample_range = sample_range
        )
      )
    }
  )
)

#' Vector Representation: Topological Vector
#'
#' @description Computes topological vectors from a list of persistence
#'   diagrams. The topological vector associated to a persistence diagram is the
#'   sorted vector of a slight modification of the pairwise distances between
#'   the persistence diagram points. See
#'   https://diglib.eg.org/handle/10.1111/cgf12692 for more details.
#'
#' @author Mathieu Carrière
#' @export
TopologicalVector <- R6::R6Class(
  classname = "TopologicalVector",
  inherit = VectorRepresentationStep,
  public = list(
    #' @description The [`TopologicalVector`] constructor.
    #'
    #' @param threshold An integer value specifying the number of distances to
    #'   keep. Defaults to `10L`. This is the dimension of the topological
    #'   vector. If `-1`, this threshold is computed from the list of
    #'   persistence diagrams by considering the one with the largest number of
    #'   points and using the dimension of its corresponding topological vector
    #'   as threshold.
    #'
    #' @return An object of class [`TopologicalVector`].
    #'
    #' @examplesIf reticulate::py_module_available("gudhi")
    #' X <- seq_circle(10)
    #' ac <- AlphaComplex$new(points = X)
    #' st <- ac$create_simplex_tree()
    #' dgm <- st$compute_persistence()$persistence_intervals_in_dimension(0)
    #' ds <- DiagramSelector$new(use = TRUE)
    #' dgm <- ds$apply(dgm)
    #' tv <- TopologicalVector$new()
    #' tv$apply(dgm)
    #' tv$fit_transform(list(dgm))
    initialize = function(threshold = 10) {
      threshold <- as.integer(threshold)
      super$set_python_class(
        gd$representations$TopologicalVector(
          threshold = threshold
        )
      )
    }
  )
)
Any scripts or data that you put into this service are public.
rgudhi documentation built on March 31, 2023, 11:38 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
rgudhi
An Interface to the GUDHI Library for Topological Data Analysis

R/representation-vector-methods.R
In rgudhi: An Interface to the GUDHI Library for Topological Data Analysis

Try the rgudhi package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rgudhi An Interface to the GUDHI Library for Topological Data Analysis

R/representation-vector-methods.R In rgudhi: An Interface to the GUDHI Library for Topological Data Analysis

Try the rgudhi package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rgudhi
An Interface to the GUDHI Library for Topological Data Analysis

R/representation-vector-methods.R
In rgudhi: An Interface to the GUDHI Library for Topological Data Analysis