R/set_fct.R

Defines functions is_probability set_fct.default set_fct.data.frame set_fct

Documented in is_probability set_fct set_fct.data.frame set_fct.default

#' @rdname set_fct.data.frame
#' @export
set_fct <- function(.data, ..., first_level = NULL, order_fct = FALSE,  labels = NULL, max_levels = Inf){

  UseMethod("set_fct", .data)
}

#' set factor
#'
#' allows option to manually set the first level of the factor, for consistency with
#' yardstick which automatically considers the first level
#' as the "positive class" when evaluating classification.
#'
#' @method set_fct data.frame
#' @param .data dataframe
#' @param ... tidyselect (default selection: all character columns)
#' @param first_level character string to set the first level of the factor
#' @param labels chr vector of labels, length equal to factor levels
#' @param order_fct logical. ordered factor?
#' @param max_levels integer. uses \code{\link[forcats]{fct_lump_n}} to limit the number of categories. Only the top \code{max_levels} are preserved, and the rest being lumped into "other"
#'
#' @return tibble
#' @export
#'
#' @examples
#'
#' ## simply set the first level of a factor
#'
#' iris$Species %>% levels
#'
#' iris %>%
#'   set_fct(Species, first_level = "virginica") %>%
#'   dplyr::pull(Species) %>%
#'   levels()
set_fct.data.frame <- function(.data, ..., first_level = NULL, order_fct = FALSE,   max_levels = Inf){

  .data %>%
    select_otherwise(..., otherwise = where(is.character), return_type = "names") -> nms


  if (!is.null(first_level)) {
    first_level <- as.character(first_level)
  }



  .data %>%
    dplyr::mutate(dplyr::across(tidyselect::any_of(nms),  .fns = ~set_fct(., first_level = first_level,
                                                                         order_fct = order_fct,
                                                                         max_levels = max_levels) ))
}



#' @rdname set_fct.data.frame
#' @method set_fct default
#' @export
set_fct.default <- function(.data, ..., first_level = NULL, order_fct = FALSE,  max_levels = Inf){


  .data %>%
    factor(ordered = order_fct) %>%
    forcats::fct_relevel(first_level, after = 0L) %>%
    forcats::fct_lump(n = max_levels, ties.method = "first")
}



#' is_probability
#'
#' @param x numeric vector
#' @keywords internal
#'
#' @return logical
is_probability <- function(x){
  is.double(x) && all(dplyr::between(x, 0, 1), na.rm = T)  & dplyr::n_distinct(x) > 2
}

Try the framecleaner package in your browser

Any scripts or data that you put into this service are public.

framecleaner documentation built on May 29, 2024, 1:59 a.m.