R/FilterVariance.R

#' @title Variance Filter
#'
#' @name mlr_filters_variance
#'
#' @description Variance filter calling `stats::var()`.
#'
#' Argument `na.rm` defaults to `TRUE` here.
#'
#' @references
#' For a benchmark of filter methods:
#'
#' `r format_bib("bommert_2020")`
#'
#' @family Filter
#' @importFrom stats var
#' @template seealso_filter
#' @export
#' @examples
#' task = mlr3::tsk("mtcars")
#' filter = flt("variance")
#' filter$calculate(task)
#' head(filter$scores, 3)
#' as.data.table(filter)
#'
#' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
#'   library("mlr3pipelines")
#'   task = mlr3::tsk("spam")
#'
#'   # Note: `filter.frac` is selected randomly and should be tuned.
#'
#'   graph = po("filter", filter = flt("variance"), filter.frac = 0.5) %>>%
#'     po("learner", mlr3::lrn("classif.rpart"))
#'
#'   graph$train(task)
#' }
FilterVariance = R6Class("FilterVariance",
  inherit = Filter,

  public = list(

    #' @description Create a FilterVariance object.
    initialize = function() {
      param_set = ps(
        na.rm = p_lgl(default = TRUE)
      )
      self$param_set$values = list(na.rm = TRUE)

      super$initialize(
        id = "variance",
        task_types = NA_character_,
        param_set = param_set,
        packages = "stats",
        feature_types = c("integer", "numeric"),
        label = "Variance",
        man = "mlr3filters::mlr_filters_variance"
      )
    }
  ),

  private = list(
    .calculate = function(task, nfeat) {
      na_rm = self$param_set$values$na.rm %??% TRUE
      map_dbl(task$data(cols = task$feature_names), var, na.rm = na_rm)
    },

    .get_properties = function() {
      if (isTRUE(self$param_set$values$na.rm)) "missings" else character()
    }
  )
)

#' @include mlr_filters.R
mlr_filters$add("variance", FilterVariance)

Try the mlr3filters package in your browser

Any scripts or data that you put into this service are public.

mlr3filters documentation built on Feb. 16, 2023, 7:29 p.m.