R/zzz-step-vpd-betti-curve.R

Defines functions tunable.step_vpd_betti_curve tidy.step_vpd_betti_curve required_pkgs.step_vpd_betti_curve print.step_vpd_betti_curve bake.step_vpd_betti_curve prep.step_vpd_betti_curve step_vpd_betti_curve_new step_vpd_betti_curve

Documented in required_pkgs.step_vpd_betti_curve step_vpd_betti_curve tidy.step_vpd_betti_curve tunable.step_vpd_betti_curve

# ------------------------------------------------------------------------------
# Generated by 'pre-generate/generate-steps.R': do not edit by hand.
# ------------------------------------------------------------------------------

#' @title Betti Curve Vectorization of Persistent Homology
#' 
#' @description The function `step_vpd_betti_curve()` creates
#'   a _specification_ of a recipe step that will convert
#'   a list-column of 3-column matrices of persistence data
#'   to a list-column of 1-row matrices of vectorizations.
#' 

#' 
#' @template step-vpd-details
#' 
#' @section Engine:
#' 
#' The Betti curve vectorization deploys
#' [TDAvec::computeBettiCurve()].
#' See there for definitions and references.
#' 
#' @section Tuning Parameters:
#' 
#' This step has 1 tuning parameter:
#' \itemize{
#'   \item `hom_degree`: Homological degree (type: integer, default: `0L`)
#' }
#' 
#' @param hom_degree
#'   The homological degree of the features to be transformed.
#' @param xseq
#'   A discretization grid, as an increasing numeric vector.
#'   `xseq` overrides the other `x*` parameters with a warning.
#' @param xmin,xmax,xlen,xby
#'   Limits and resolution of a discretization grid;
#'   specify only one of `xlen` and `xby`.
#' @param evaluate
#'   The method by which to vectorize continuous functions over a grid,
#'   either 'intervals' or 'points'.
#'   Some functions only admit one method.

#' @import recipes
#' @inheritParams recipes::step_pca
#' @inherit recipes::step_pca return
#' @example inst/examples/zzz-ex-step-vpd-betti-curve.R

#' @export
step_vpd_betti_curve <- function(
    recipe,
    ...,
    role = "predictor",
    trained = FALSE,
    hom_degree = 0L,
    xseq = NULL, xmin = NULL, xmax = NULL, xlen = NULL, xby = NULL,
    evaluate = "intervals",
    columns = NULL,
    keep_original_cols = TRUE,
    skip = FALSE,
    id = rand_id("vpd_betti_curve")
) {
  recipes_pkg_check(required_pkgs.step_vpd_betti_curve())
  
  add_step(
    recipe,
    step_vpd_betti_curve_new(
      terms = rlang::enquos(...),
      trained = trained,
      role = role,
      hom_degree = hom_degree,
      xseq = xseq, xmin = xmin, xmax = xmax, xlen = xlen, xby = xby,
      evaluate = evaluate,
      columns = columns,
      keep_original_cols = keep_original_cols,
      skip = skip,
      id = id
    )
  )
}

step_vpd_betti_curve_new <- function(
    terms,
    role, trained,
    hom_degree,
    xseq, xmin, xmax, xlen, xby,
    evaluate,
    columns, keep_original_cols,
    skip, id
) {
  step(
    subclass = "vpd_betti_curve",
    terms = terms,
    role = role,
    trained = trained,
    hom_degree = hom_degree,
    xseq = xseq, xmin = xmin, xmax = xmax, xlen = xlen, xby = xby,
    evaluate = evaluate,
    columns = columns,
    keep_original_cols = keep_original_cols,
    skip = skip,
    id = id
  )
}

#' @export
prep.step_vpd_betti_curve <- function(x, training, info = NULL, ...) {
  col_names <- recipes_eval_select(x$terms, training, info)
  check_pd_list(training[, col_names, drop = FALSE])
  for (col_name in col_names) class(training[[col_name]]) <- "list"
  
  x[paste0("x", c("seq", "min", "max", "len", "by"))] <- 
    reconcile_scale_seq(x, training[, col_names, drop = FALSE], "x")
  
  step_vpd_betti_curve_new(
    terms = col_names,
    role = x$role,
    trained = TRUE,
    hom_degree = x$hom_degree,
    xseq = x$xseq, xmin = x$xmin, xmax = x$xmax, xlen = x$xlen, xby = x$xby,
    evaluate = x$evaluate,
    columns = col_names,
    keep_original_cols = get_keep_original_cols(x),
    skip = x$skip,
    id = x$id
  )
}

#' @export
bake.step_vpd_betti_curve <- function(object, new_data, ...) {
  col_names <- names(object$columns)
  check_new_data(col_names, object, new_data)
  for (col_name in col_names) class(new_data[[col_name]]) <- "list"
  
  vph_data <- tibble::tibble(.rows = nrow(new_data))
  for (col_name in col_names) {
    col_vpd <- purrr::map(
      new_data[[col_name]],
      function(d) {
        v <- TDAvec::computeBettiCurve(
          as.matrix(d),
          homDim = object$hom_degree,
          scaleSeq = object$xseq,
          evaluate = object$evaluate
        )
        vn <- vpd_suffix(v)
        v <- as.vector(v)
        names(v) <- vn
        v
      }
    )
    col_vpd <- purrr::map(
      col_vpd,
      function(v) as.data.frame(matrix(
        v, nrow = 1L, dimnames = list(NULL, names(v))
      ))
    )
    vph_data[[paste(col_name, "bc", sep = "_")]] <- col_vpd
  }
  vph_data <- tidyr::unnest(
    vph_data,
    cols = tidyr::all_of(paste(col_names, "bc", sep = "_")),
    names_sep = "_"
  )
  
  check_name(vph_data, new_data, object)
  new_data <- vctrs::vec_cbind(new_data, vph_data)
  new_data <- remove_original_cols(new_data, object, col_names)
  new_data
}

#' @export
print.step_vpd_betti_curve <- function(
    x, width = max(20, options()$width - 35), ...
) {
  title <- "Betti curve of "
  
  print_step(
    untr_obj = x$terms,
    tr_obj = NULL,
    trained = x$trained,
    title = title,
    width = width
  )
  invisible(x)
}

#' @rdname required_pkgs.tdarec
#' @export
required_pkgs.step_vpd_betti_curve <- function(x, ...) {
  c("TDAvec", "tdarec")
}

#' @rdname step_vpd_betti_curve
#' @usage NULL
#' @export
tidy.step_vpd_betti_curve <- function(x, ...) {
  if (is_trained(x)) {
    res <- tibble::tibble(
      terms = unname(x$columns),
      value = rep(NA_real_, length(x$columns))
    )
  } else {
    term_names <- sel2char(x$terms)
    res <- tibble::tibble(
      terms = term_names,
      value = rep(NA_real_, length(term_names))
    )
  }
  res$id <- x$id
  res
}

#' @rdname tunable_tdavec
#' @export
tunable.step_vpd_betti_curve <- function(x, ...) {
  tibble::tibble(
    name = c("hom_degree"),
    call_info = list(
      list(pkg = "tdarec", fun = "hom_degree", range = c(0L, unknown()))
    ),
    source = "recipe",
    component = "step_vpd_betti_curve",
    component_id = x$id
  )
}

Try the tdarec package in your browser

Any scripts or data that you put into this service are public.

tdarec documentation built on June 8, 2025, 10:41 a.m.