R/zzz-step-vpd-descriptive-statistics.R

Defines functions tunable.step_vpd_descriptive_statistics tidy.step_vpd_descriptive_statistics required_pkgs.step_vpd_descriptive_statistics print.step_vpd_descriptive_statistics bake.step_vpd_descriptive_statistics prep.step_vpd_descriptive_statistics step_vpd_descriptive_statistics_new step_vpd_descriptive_statistics

Documented in required_pkgs.step_vpd_descriptive_statistics step_vpd_descriptive_statistics tidy.step_vpd_descriptive_statistics tunable.step_vpd_descriptive_statistics

# ------------------------------------------------------------------------------
# Generated by 'pre-generate/generate-steps.R': do not edit by hand.
# ------------------------------------------------------------------------------

#' @title Descriptive Statistics Vectorization of Persistent Homology
#' 
#' @description The function `step_vpd_descriptive_statistics()` creates
#'   a _specification_ of a recipe step that will convert
#'   a list-column of 3-column matrices of persistence data
#'   to a list-column of 1-row matrices of vectorizations.
#' 

#' 
#' @template step-vpd-details
#' 
#' @section Engine:
#' 
#' The descriptive statistics vectorization deploys
#' [TDAvec::computeStats()].
#' See there for definitions and references.
#' 
#' @section Tuning Parameters:
#' 
#' This step has 1 tuning parameter:
#' \itemize{
#'   \item `hom_degree`: Homological degree (type: integer, default: `0L`)
#' }
#' 
#' @param hom_degree
#'   The homological degree of the features to be transformed.

#' @import recipes
#' @inheritParams recipes::step_pca
#' @inherit recipes::step_pca return
#' @example inst/examples/zzz-ex-step-vpd-descriptive-statistics.R

#' @export
step_vpd_descriptive_statistics <- function(
    recipe,
    ...,
    role = "predictor",
    trained = FALSE,
    hom_degree = 0L,
    columns = NULL,
    keep_original_cols = TRUE,
    skip = FALSE,
    id = rand_id("vpd_descriptive_statistics")
) {
  recipes_pkg_check(required_pkgs.step_vpd_descriptive_statistics())
  
  add_step(
    recipe,
    step_vpd_descriptive_statistics_new(
      terms = rlang::enquos(...),
      trained = trained,
      role = role,
      hom_degree = hom_degree,
      columns = columns,
      keep_original_cols = keep_original_cols,
      skip = skip,
      id = id
    )
  )
}

step_vpd_descriptive_statistics_new <- function(
    terms,
    role, trained,
    hom_degree,
    columns, keep_original_cols,
    skip, id
) {
  step(
    subclass = "vpd_descriptive_statistics",
    terms = terms,
    role = role,
    trained = trained,
    hom_degree = hom_degree,
    columns = columns,
    keep_original_cols = keep_original_cols,
    skip = skip,
    id = id
  )
}

#' @export
prep.step_vpd_descriptive_statistics <- function(x, training, info = NULL, ...) {
  col_names <- recipes_eval_select(x$terms, training, info)
  check_pd_list(training[, col_names, drop = FALSE])
  for (col_name in col_names) class(training[[col_name]]) <- "list"
  
  
  step_vpd_descriptive_statistics_new(
    terms = col_names,
    role = x$role,
    trained = TRUE,
    hom_degree = x$hom_degree,
    columns = col_names,
    keep_original_cols = get_keep_original_cols(x),
    skip = x$skip,
    id = x$id
  )
}

#' @export
bake.step_vpd_descriptive_statistics <- function(object, new_data, ...) {
  col_names <- names(object$columns)
  check_new_data(col_names, object, new_data)
  for (col_name in col_names) class(new_data[[col_name]]) <- "list"
  
  vph_data <- tibble::tibble(.rows = nrow(new_data))
  for (col_name in col_names) {
    col_vpd <- purrr::map(
      new_data[[col_name]],
      function(d) {
        v <- TDAvec::computeStats(
          as.matrix(d),
          homDim = object$hom_degree
        )
        vn <- vpd_suffix(v)
        v <- as.vector(v)
        names(v) <- vn
        v
      }
    )
    col_vpd <- purrr::map(
      col_vpd,
      function(v) as.data.frame(matrix(
        v, nrow = 1L, dimnames = list(NULL, names(v))
      ))
    )
    vph_data[[paste(col_name, "s", sep = "_")]] <- col_vpd
  }
  vph_data <- tidyr::unnest(
    vph_data,
    cols = tidyr::all_of(paste(col_names, "s", sep = "_")),
    names_sep = "_"
  )
  
  check_name(vph_data, new_data, object)
  new_data <- vctrs::vec_cbind(new_data, vph_data)
  new_data <- remove_original_cols(new_data, object, col_names)
  new_data
}

#' @export
print.step_vpd_descriptive_statistics <- function(
    x, width = max(20, options()$width - 35), ...
) {
  title <- "descriptive statistics of "
  
  print_step(
    untr_obj = x$terms,
    tr_obj = NULL,
    trained = x$trained,
    title = title,
    width = width
  )
  invisible(x)
}

#' @rdname required_pkgs.tdarec
#' @export
required_pkgs.step_vpd_descriptive_statistics <- function(x, ...) {
  c("TDAvec", "tdarec")
}

#' @rdname step_vpd_descriptive_statistics
#' @usage NULL
#' @export
tidy.step_vpd_descriptive_statistics <- function(x, ...) {
  if (is_trained(x)) {
    res <- tibble::tibble(
      terms = unname(x$columns),
      value = rep(NA_real_, length(x$columns))
    )
  } else {
    term_names <- sel2char(x$terms)
    res <- tibble::tibble(
      terms = term_names,
      value = rep(NA_real_, length(term_names))
    )
  }
  res$id <- x$id
  res
}

#' @rdname tunable_tdavec
#' @export
tunable.step_vpd_descriptive_statistics <- function(x, ...) {
  tibble::tibble(
    name = c("hom_degree"),
    call_info = list(
      list(pkg = "tdarec", fun = "hom_degree", range = c(0L, unknown()))
    ),
    source = "recipe",
    component = "step_vpd_descriptive_statistics",
    component_id = x$id
  )
}

Try the tdarec package in your browser

Any scripts or data that you put into this service are public.

tdarec documentation built on June 8, 2025, 10:41 a.m.