R/zzz-step-vpd-persistence-landscape.R

Defines functions tunable.step_vpd_persistence_landscape tidy.step_vpd_persistence_landscape required_pkgs.step_vpd_persistence_landscape print.step_vpd_persistence_landscape bake.step_vpd_persistence_landscape prep.step_vpd_persistence_landscape step_vpd_persistence_landscape_new step_vpd_persistence_landscape

Documented in required_pkgs.step_vpd_persistence_landscape step_vpd_persistence_landscape tidy.step_vpd_persistence_landscape tunable.step_vpd_persistence_landscape

# ------------------------------------------------------------------------------
# Generated by 'pre-generate/generate-steps.R': do not edit by hand.
# ------------------------------------------------------------------------------

#' @title Persistence Landscape Vectorization of Persistent Homology
#' 
#' @description The function `step_vpd_persistence_landscape()` creates
#'   a _specification_ of a recipe step that will convert
#'   a list-column of 3-column matrices of persistence data
#'   to a list-column of 1-row matrices of vectorizations.
#' 

#' 
#' @template step-vpd-details
#' 
#' @section Engine:
#' 
#' The persistence landscape vectorization deploys
#' [TDAvec::computePersistenceLandscape()].
#' See there for definitions and references.
#' 
#' @section Tuning Parameters:
#' 
#' This step has 4 tuning parameters:
#' \itemize{
#'   \item `hom_degree`: Homological degree (type: integer, default: `0L`)
#'   \item `num_levels`: # Levels or envelopes (type: integer, default: `6L`)
#'   \item `weight_func_pl`: Kernel distance weight function (type: character, default: `"triangle"`)
#'   \item `bandwidth`: Kernel bandwidth (type: double, default: `NULL`)
#' }
#' 
#' @param hom_degree
#'   The homological degree of the features to be transformed.
#' @param xseq
#'   A discretization grid, as an increasing numeric vector.
#'   `xseq` overrides the other `x*` parameters with a warning.
#' @param xmin,xmax,xlen,xby
#'   Limits and resolution of a discretization grid;
#'   specify only one of `xlen` and `xby`.
#' @param num_levels
#'   The number of levels of a persistence landscape to vectorize.
#'   If `num_levels` is greater than the length of a landscape,
#'   then additional levels of zeros will be included.
#' @param generalized
#'   Logical indicator to compute generalized functions.
#' @param weight_func_pl
#'   A _single_ character for the type of kernel function
#'   used to compute generalized landscapes.
#' @param bandwidth
#'   The bandwidth of a kernel function.

#' @import recipes
#' @inheritParams recipes::step_pca
#' @inherit recipes::step_pca return
#' @example inst/examples/zzz-ex-step-vpd-persistence-landscape.R

#' @export
step_vpd_persistence_landscape <- function(
    recipe,
    ...,
    role = "predictor",
    trained = FALSE,
    hom_degree = 0L,
    xseq = NULL, xmin = NULL, xmax = NULL, xlen = NULL, xby = NULL,
    num_levels = 6L,
    generalized = FALSE,
    weight_func_pl = "triangle",
    bandwidth = NULL,
    columns = NULL,
    keep_original_cols = TRUE,
    skip = FALSE,
    id = rand_id("vpd_persistence_landscape")
) {
  recipes_pkg_check(required_pkgs.step_vpd_persistence_landscape())
  
  add_step(
    recipe,
    step_vpd_persistence_landscape_new(
      terms = rlang::enquos(...),
      trained = trained,
      role = role,
      hom_degree = hom_degree,
      xseq = xseq, xmin = xmin, xmax = xmax, xlen = xlen, xby = xby,
      num_levels = num_levels,
      generalized = generalized,
      weight_func_pl = weight_func_pl,
      bandwidth = bandwidth,
      columns = columns,
      keep_original_cols = keep_original_cols,
      skip = skip,
      id = id
    )
  )
}

step_vpd_persistence_landscape_new <- function(
    terms,
    role, trained,
    hom_degree,
    xseq, xmin, xmax, xlen, xby,
    num_levels,
    generalized,
    weight_func_pl,
    bandwidth,
    columns, keep_original_cols,
    skip, id
) {
  step(
    subclass = "vpd_persistence_landscape",
    terms = terms,
    role = role,
    trained = trained,
    hom_degree = hom_degree,
    xseq = xseq, xmin = xmin, xmax = xmax, xlen = xlen, xby = xby,
    num_levels = num_levels,
    generalized = generalized,
    weight_func_pl = weight_func_pl,
    bandwidth = bandwidth,
    columns = columns,
    keep_original_cols = keep_original_cols,
    skip = skip,
    id = id
  )
}

#' @export
prep.step_vpd_persistence_landscape <- function(x, training, info = NULL, ...) {
  col_names <- recipes_eval_select(x$terms, training, info)
  check_pd_list(training[, col_names, drop = FALSE])
  for (col_name in col_names) class(training[[col_name]]) <- "list"
  
  x[paste0("x", c("seq", "min", "max", "len", "by"))] <- 
    reconcile_scale_seq(x, training[, col_names, drop = FALSE], "x")
  
  if (is.null(x$bandwidth)) {
      if (!isFALSE(x$generalized)) 
          warning("`bandwidth` is `NULL` so `generalized` is set to `FALSE`.")
      x$generalized = FALSE
  }
  else {
      if (!isTRUE(x$generalized)) 
          warning("`bandwidth` is provided so `generalized` is set to `TRUE`.")
      x$generalized = TRUE
  }
  if (is.na(.TDAvec_version) || .TDAvec_version == "0.1.4") {
      x_pairs_min <- vapply(training[, col_names, drop = FALSE], 
          function(l) {
              val <- vapply(l, pairs_min, 0, hom_degree = x$hom_degree)
              min(val[is.finite(val)])
          }, 0)
      if (x$num_levels > x_pairs_min) {
          warning("`num_levels = ", x$num_levels, "` is less than minimum diagram size ", 
              "so will be reset to ", x_pairs_min)
          x$num_levels <- x_pairs_min
      }
  }

  step_vpd_persistence_landscape_new(
    terms = col_names,
    role = x$role,
    trained = TRUE,
    hom_degree = x$hom_degree,
    xseq = x$xseq, xmin = x$xmin, xmax = x$xmax, xlen = x$xlen, xby = x$xby,
    num_levels = x$num_levels,
    generalized = x$generalized,
    weight_func_pl = x$weight_func_pl,
    bandwidth = x$bandwidth,
    columns = col_names,
    keep_original_cols = get_keep_original_cols(x),
    skip = x$skip,
    id = x$id
  )
}

#' @export
bake.step_vpd_persistence_landscape <- function(object, new_data, ...) {
  col_names <- names(object$columns)
  check_new_data(col_names, object, new_data)
  for (col_name in col_names) class(new_data[[col_name]]) <- "list"
  
  vph_data <- tibble::tibble(.rows = nrow(new_data))
  for (col_name in col_names) {
    col_vpd <- purrr::map(
      new_data[[col_name]],
      function(d) {
        v <- TDAvec::computePersistenceLandscape(
          as.matrix(d),
          homDim = object$hom_degree,
          scaleSeq = object$xseq,
          k = object$num_levels,
          generalized = object$generalized,
          kernel = object$weight_func_pl,
          h = object$bandwidth
        )
        vn <- vpd_suffix(v)
        v <- as.vector(v)
        names(v) <- vn
        v
      }
    )
    col_vpd <- purrr::map(
      col_vpd,
      function(v) as.data.frame(matrix(
        v, nrow = 1L, dimnames = list(NULL, names(v))
      ))
    )
    vph_data[[paste(col_name, "pl", sep = "_")]] <- col_vpd
  }
  vph_data <- tidyr::unnest(
    vph_data,
    cols = tidyr::all_of(paste(col_names, "pl", sep = "_")),
    names_sep = "_"
  )
  
  check_name(vph_data, new_data, object)
  new_data <- vctrs::vec_cbind(new_data, vph_data)
  new_data <- remove_original_cols(new_data, object, col_names)
  new_data
}

#' @export
print.step_vpd_persistence_landscape <- function(
    x, width = max(20, options()$width - 35), ...
) {
  title <- "persistence landscape of "
  
  print_step(
    untr_obj = x$terms,
    tr_obj = NULL,
    trained = x$trained,
    title = title,
    width = width
  )
  invisible(x)
}

#' @rdname required_pkgs.tdarec
#' @export
required_pkgs.step_vpd_persistence_landscape <- function(x, ...) {
  c("TDAvec", "tdarec")
}

#' @rdname step_vpd_persistence_landscape
#' @usage NULL
#' @export
tidy.step_vpd_persistence_landscape <- function(x, ...) {
  if (is_trained(x)) {
    res <- tibble::tibble(
      terms = unname(x$columns),
      value = rep(NA_real_, length(x$columns))
    )
  } else {
    term_names <- sel2char(x$terms)
    res <- tibble::tibble(
      terms = term_names,
      value = rep(NA_real_, length(term_names))
    )
  }
  res$id <- x$id
  res
}

#' @rdname tunable_tdavec
#' @export
tunable.step_vpd_persistence_landscape <- function(x, ...) {
  tibble::tibble(
    name = c("hom_degree", "num_levels", "weight_func_pl", "bandwidth"),
    call_info = list(
      list(pkg = "tdarec", fun = "hom_degree", range = c(0L, unknown())),
      list(pkg = "tdarec", fun = "num_levels", range = c(1L, unknown())),
      list(pkg = "tdarec", fun = "weight_func_pl", values = c("triangle", "epanechnikov", "tricubic")),
      list(pkg = "tdarec", fun = "bandwidth", range = c(unknown(), unknown()))
    ),
    source = "recipe",
    component = "step_vpd_persistence_landscape",
    component_id = x$id
  )
}

Try the tdarec package in your browser

Any scripts or data that you put into this service are public.

tdarec documentation built on June 8, 2025, 10:41 a.m.