R/util.R

Defines functions feature_relevance categorical_to_int

Documented in categorical_to_int feature_relevance

#' Helper fuction to transform a categorical dataset
#' into one represented by integers
#' @param data a tibble of categorical data
#' @return a list containing:
#' \itemize{
#'    \item character vector of all unique feature values (V)
#'    \item converted tibble with character values represented as integers
#' }
categorical_to_int <- function(data) {
  # Convert all observations into a
  # unique value which can be summarized into
  # a single vector of feature values, V
  # HACK: There has got to be a better way to do this
  data <- Map(
    f = paste,
    names(data),
    data,
    sep = "=="
  ) %>%
    dplyr::as_tibble()

  # Calculate V, the set of unique feature values
  # within the dataset
  all_fact <- unique(unlist(data))

  # Convert all categorical variables into integer by
  # first converting to factor with levels V
  # then coercing to its integer representation
  list(
    dplyr::as_tibble(data) %>%
      dplyr::mutate_all(
        dplyr::funs(
          as.integer(factor(., levels = all_fact))
        )
      ),
    all_fact
  )
}

#' @title Display the computed feature relevance in the CBRW model
#' @description TODO
#' @param data A tibble as provided by the output of a cbrw call
#' @return a tibble containing each feature with it's computed relevance
#' @export
feature_relevance <- function(data) {
  attr(data, "feature_rel")
}
beansrowning/cbRw documentation built on Oct. 2, 2020, 12:08 a.m.