R/step-pd-degree.R

Defines functions tidy.step_pd_degree required_pkgs.step_pd_degree print.step_pd_degree bake.step_pd_degree prep.step_pd_degree step_pd_degree_new step_pd_degree

Documented in required_pkgs.step_pd_degree step_pd_degree tidy.step_pd_degree

#' @title Separate persistent pairs by homological degree
#'
#' @description The function `step_pd_degree()` creates a _specification_ of a
#'   recipe step that will separate data sets of persistent pairs by homological
#'   degree. The input and output must be list-columns.
#'
#' @details Additional details...
#'
#'   The `hom_degrees` argument sets the homological degrees for which to return
#'   new list-columns. If not `NULL` (the default), it is intersected with the
#'   degrees found in any specified columns of the training data; otherwise all
#'   found degrees are used. This parameter cannot be tuned.
#'

#' @import recipes
#' @importFrom dials new_quant_param unknown
#' @inheritParams recipes::step_pca
#' @inherit recipes::step_pca return
#' @param hom_degrees Integer vector of homological degrees.
#' @family topological feature extraction via persistent homology
#' @example inst/examples/ex-step-pd-degree.R

#' @export
step_pd_degree <- function(
    recipe,
    ...,
    # standard inputs
    role = "persistence diagram",
    trained = FALSE,
    # custom parameters
    hom_degrees = NULL,
    # standard parameters
    columns = NULL,
    keep_original_cols = FALSE,
    skip = FALSE,
    id = rand_id("pd_degree")
) {
  recipes_pkg_check(required_pkgs.step_pd_degree())
  
  # output the step
  add_step(
    recipe,
    step_pd_degree_new(
      terms = rlang::enquos(...),
      trained = trained,
      role = role,
      hom_degrees = hom_degrees,
      columns = columns,
      keep_original_cols = keep_original_cols,
      skip = skip,
      id = id
    )
  )
}

step_pd_degree_new <- function(
    terms,
    role, trained,
    hom_degrees,
    columns, keep_original_cols,
    skip, id
) {
  step(
    subclass = "pd_degree",
    terms = terms,
    role = role,
    trained = trained,
    hom_degrees = hom_degrees,
    columns = columns,
    keep_original_cols = keep_original_cols,
    skip = skip,
    id = id
  )
}

#' @export
prep.step_pd_degree <- function(x, training, info = NULL, ...) {
  # save(x, training, info, file = here::here("step-pd-degree-prep.rda"))
  # load(here::here("step-pd-degree-prep.rda"))
  
  col_names <- recipes_eval_select(x$terms, training, info)
  check_pd_list(training[, col_names, drop = FALSE])
  for (col_name in col_names) class(training[[col_name]]) <- "list"
  
  # intersection of `hom_degrees` (if passed) and dimensions in data
  x_hom_degrees <- get_hom_degrees(training[, col_names, drop = FALSE])
  if (is.null(x$hom_degrees))
    x$hom_degrees <- x_hom_degrees
  else
    x$hom_degrees <- sort(intersect(as.integer(x$hom_degrees), x_hom_degrees))
  
  # output prepped step
  step_pd_degree_new(
    terms = col_names,
    role = x$role,
    trained = TRUE,
    hom_degrees = x$hom_degrees,
    columns = col_names,
    keep_original_cols = get_keep_original_cols(x),
    skip = x$skip,
    id = x$id
  )
}

#' @export
bake.step_pd_degree <- function(object, new_data, ...) {
  # save(object, new_data, file = here::here("step-pd-degree-bake.rda"))
  # load(here::here("step-pd-degree-bake.rda"))
  
  col_names <- names(object$columns)
  check_new_data(col_names, object, new_data)
  for (col_name in col_names) class(new_data[[col_name]]) <- "list"
  
  # iterate all columns over the same degrees
  pd_data <- tibble::tibble(.rows = nrow(new_data))
  for (term in object$terms) for (deg in object$hom_degrees) {
    
    # NB: This works for the 'PHom' class but may not for other formats.
    term_deg_pd <- lapply(new_data[[term]], function(d) d[d[, 1L] == deg, ])
    
    pd_data[[paste(term, deg, sep = "_")]] <- term_deg_pd
  }
  
  check_name(pd_data, new_data, object)
  new_data <- vctrs::vec_cbind(new_data, pd_data)
  new_data <- remove_original_cols(new_data, object, col_names)
  new_data
}

#' @export
print.step_pd_degree <- function(
    x, width = max(20, options()$width - 35), ...
) {
  # save(x, width, file = here::here("step-pd-degree-print.rda"))
  # load(here::here("step-pd-degree-print.rda"))
  
  title <- paste0(
    paste0(x$hom_degrees, collapse = ", "),
    "-degree features from "
  )
  
  print_step(
    untr_obj = x$terms,
    tr_obj = NULL,
    trained = x$trained,
    title = title,
    width = width
  )
  invisible(x)
}

#' @rdname required_pkgs.tdarec
#' @export
required_pkgs.step_pd_degree <- function(x, ...) {
  c("tdarec")
}

#' @rdname step_pd_degree
#' @usage NULL
#' @export
tidy.step_pd_degree <- function(x, ...) {
  if (is_trained(x)) {
    res <- tibble::tibble(
      terms = unname(x$columns),
      value = rep(NA_real_, length(x$columns))
    )
  } else {
    term_names <- sel2char(x$terms)
    res <- tibble::tibble(
      terms = term_names,
      value = rep(NA_real_, length(term_names))
    )
  }
  res$id <- x$id
  res
}

Try the tdarec package in your browser

Any scripts or data that you put into this service are public.

tdarec documentation built on June 8, 2025, 10:41 a.m.