R/0_util.r
In cheem: Interactively Explore Local Explanations with the Radial Tour

Documented in as_logical_index color_scale_of contains_nonnumeric devMessage ifDev is_discrete is_diverging linear_tform logistic_tform problem_type rnorm_from

## Utility functions -----

#' Check if a vector is discrete
#' 
#' Whether or not a vector is a discrete variable, returns a logical.
#' Typically used on the Y variable of a model.
#' 
#' @param x A vector to check the discreteness of.
#' @param na.rm Whether or not to remove NA values before testing discreteness.
#' Defaults to TRUE.
#' @return Logical, whether or not `x` is a discrete variable.
#' @export
#' @family cheem utility
#' @examples
#' library(cheem)
#' 
#' is_discrete(mtcars$mpg) ## Numeric column, with more than 25 unique values.
#' is_discrete(mtcars$cyl) ## Numeric column, labeled as discrete, because less than 25 unique values
#' is_discrete(letters)    ## Characters and factors labeled discrete.
is_discrete <- function(x, na.rm = TRUE){
  x <- x[is.na(x) == FALSE] ## Remove NAs
  is.factor(x) || is.character(x) || is.logical(x) ||
    (length(unique(x)) < 25 & is.numeric(x))
}

#' Check if a vector diverges a value
#' 
#' Whether or not a vector is a diverges a value, returns a logical.
#' Used to help default a scale_color for ggplot2.
#' 
#' @param x A vector to check the divergence of.
#' @param mid_pt A single number checking divergence from. Defaults to 0.
#' @return Logical, whether or not `x` is a diverges `mid_pt`.
#' @export
#' @family cheem utility
#' @examples
#' library(cheem)
#' 
#' is_diverging(-10:10)
#' is_diverging(-10:-5)
#' is_diverging(mtcars$mpg, 25)
#' is_diverging(mtcars$mpg, 40)
is_diverging <- function(x, mid_pt = 0){
  x <- x[is.na(x) == FALSE] ## Remove NAs
  max(x) > mid_pt & min(x) < mid_pt
}

#' Suggest a color and fill scale.
#' 
#' Whether or not a vector is a diverges a value, returns a logical.
#' Used to help default a scale_color for ggplot2.
#' 
#' @param x A vector to to scale the color to.
#' @param mid_pt A single number checking divergence from. Defaults to 0.
#' @param limits A vector of the min and max values for the scale. 
#' Useful for setting an absolute range, such as (-1, 1) for attribution/y 
#' correlation of each point. Points outside of limits show as default grey.
#' Defaults to NULL; the range of x.
#' @param ... Optional other arguments passed to ggplot2::continuous_scale 
#' or ggplot2::discrete_scale.
#' @return A list containing a scale_color, and scale_fill; 
#' the suggested color/fill scale for a ggplot.
#' @export
#' @aliases colour_scale_of
#' @family cheem utility
#' @examples
#' library(cheem)
#' library(ggplot2)
#' g <- ggplot(mtcars, aes(disp, mpg))
#' 
#' ## Discrete
#' g + geom_point(aes(color = factor(vs))) +
#'   color_scale_of(mtcars$vs) 
#' ## Sequential increasing
#' g + geom_point(aes(color = mpg)) +
#'   color_scale_of(mtcars$mpg)
#' ## Dummy sequential decr
#' g + geom_point(aes(color = -1 *mpg)) +
#'   color_scale_of(-1 * mtcars$mpg)
#' ## Dummy diverging
#' g + geom_point(aes(color = mpg - median(mpg))) +
#'   color_scale_of(mtcars$mpg - median(mtcars$mpg))
#' ## Dummy limits
#' g + geom_point(aes(color = mpg - median(mpg))) +
#'   color_scale_of(mtcars$mpg - median(mtcars$mpg), limits = c(-5, 5))
color_scale_of <- function(x, mid_pt = 0, limits = NULL, ...){
  x <- x[is.na(x) == FALSE] ## Remove NAs
  b <- "blue3" #scales::muted("blue")
  g <- "grey80"
  r <- "red3"  #scales::muted("red")
  if(is_discrete(x)){
    ret <- list(ggplot2::scale_color_brewer(palette = "Dark2", ...),
                ggplot2::scale_fill_brewer( palette = "Dark2", ...))
  }else if(is_diverging(x, mid_pt)){
    ## Diverging
    ret <- list(
      ggplot2::scale_color_gradient2(
        low = b, mid = g, high = r, limits = limits, ...),
      ggplot2::scale_fill_gradient2( 
        low = b, mid = g, high = r, limits = limits, ...))
  }else if(all(x >= mid_pt)){
    ## Sequential above mid_pt
    ret <-  ret <- list(
      ggplot2::scale_color_gradient(low = g, high = r, limits = limits, ...),
      ggplot2::scale_fill_gradient( low = g, high = r, limits = limits, ...))
  }else{
    ## Sequential below mid_pt
    ret <- list(
      ggplot2::scale_color_gradient(low = b, high = g, limits = limits, ...),
      ggplot2::scale_fill_gradient( low = b, high = g, limits = limits, ...))
  }
  ## Return
  ret
}

#' The type of model for a given Y variable
#' 
#' Whether the Y is a "classification", "regression" or ill-defined problem.
#' Returns a character: "classification", "regression", or an error for strange 
#' classes. Minor redundancy with is_discrete, though explicit. Could be useful
#' for `DALEX::explain(type)` as it also expects "classification" or 
#' "regression".
#' @param y Response variable to be modeled
#' @return Character either c("classification", "regression") specifying the 
#' assumed model task based on the discreteness of y.
#' @export
#' @family cheem utility
#' @examples
#' library(cheem)
#' 
#' problem_type(mtcars$mpg) ## Numeric, many values
#' problem_type(mtcars$cyl) ## Numeric column, labeled as discrete, because less than 25 unique values
#' problem_type(letters) ## Character to classification
#' problem_type(factor(letters)) ## Factor to classification
problem_type <- function(y){
  if(all(is.null(y) | is.na(y))) return("No y provided")
  if(is_discrete(y) == TRUE) return("classification")
  if(is.numeric(y)  == TRUE) return("regression")
  stop("y was expected as a with less than 25 unique values, or continuous numeric; indicating a classification or regression problem respectivly.")
}


#' Check if a vector contains non-numeric character
#' 
#' Returns a logical, whether or not a vector contains any non-numeric 
#' characters. Typically used to test if row names hold non-index information.
#' 
#' @param x A vector to be tested for existence of non-numeric characters.
#' @return Logical, whether or not x contains any non-numeric characters.
#' @export
#' @family cheem utility
#' @examples
#' library(cheem)
#' 
#' contains_nonnumeric(mtcars$mpg)
#' contains_nonnumeric(rownames(mtcars)) ## Meaningful info to use in tooltip
#' contains_nonnumeric(rownames(cars)) ## Assume no meaningful info to use in tooltip
contains_nonnumeric <- function(x){
  suppressWarnings(any(is.na(as.numeric(as.character(x)))))
}

#' Draw new samples from the supplied data given its mean and covariances.
#' 
#' Creates new observation of the data given its specific means and shapes.
#' typically applied to a cluster subset of data. _ie_ draw from cluster 'a', 
#' then assign to cluster 'b'.
#' 
#' @param data A data.frame or matrix to sample from.
#' @param n_obs Number of new observations to draw. Defaults to 1.
#' @param var_coeff Variance coefficient, closer to 0 make points near the 
#' median, above 1 makes more points further away from the median. 
#' Defaults to 1.
#' @param method The method of the covariance matrix. Expects "person" 
#' (continuous numeric), "kendall" or "spearman" 
#' (latter two are ranked based ordinal).
#' @return A data.frame, sampled observations given the means and covariance of 
#' the data based on with column names kept.
#' @export
#' @family cheem utility
#' @examples
#' library(cheem)
#' 
#' sub <- mtcars[mtcars$cyl == 6, ]
#' ## Draw 3 new observations in the shape of 6 cylinder vehicles, with reduced variance.
#' rnorm_from(data = sub, n_obs = 3, var_coeff = .5)
rnorm_from <- function(
  data, n_obs = 1, var_coeff = 1, method = c("pearson", "kendall", "spearman")
){
  .mns <- apply(data, 2, stats::median)
  .cov <- stats::cov(data, method = match.arg(method))
  diag(.cov) <- var_coeff * diag(.cov) ## Decrease univariate variance if needed.
  ## person numeric, not spearman ranked/ordinal
  
  ## Check if this sigma is a positive definite matrix.
  if(lqmm::is.positive.definite(.cov) == FALSE){
    .cov <- lqmm::make.positive.definite(.cov)
  }
  
  ## Sample and return
  ret <- mvtnorm::rmvnorm(n = n_obs, mean = .mns, sigma =  var_coeff * .cov)
  as.data.frame(ret)
}

#' Linear function to help set alpha opacity
#' 
#' Suggests a alpha opacity to plot with as a function of the number of 
#' observation.
#' 
#' @param n Number of observations to plot.
#' @param appox_max_n The number of observation to reach floor opacity.
#' @param ceiling The highest number returned. Defaults to 1.
#' @param floor The lowest number returned. Defaults to 0.3.
#' @return A scalar numeric, suggested value to set alpha opacity.
#' @export
#' @family cheem utility
#' @examples
#' library(cheem)
#' 
#' ## Suggest an opacity to use in plotting:
#' (my_alpha <- linear_tform(nrow(spinifex::penguins_na.rm)))
#'
#' ## Visualize
#' x <- 1:2000
#' plot(x, sapply(x, linear_tform), col = "blue")
linear_tform = function(
  n, appox_max_n = 5000, ceiling = 1, floor = .3
){
  vec <- 1 - min(n / appox_max_n, 1)
  ceiling * (floor + (1 - floor) * vec)
}

#' Logistic function to help set alpha opacity
#' 
#' Suggests a alpha opacity to plot with as a function of the number of 
#' observation.
#' 
#' @param n Number of observations to plot.
#' @param mid_pt Inflection point that the logistic curve. Defaults to 600.
#' @param k_attenuation The steepness of the transition, larger is a sharper
#' transition. Quite sensitive and defaults to 5.
#' @param ceiling The highest number returned. Defaults to 1.
#' @param floor The lowest number returned. Defaults to 0.3.
#' @return A scalar numeric, suggested value to set alpha opacity.
#' @export
#' @family cheem utility
#' @examples
#' library(cheem)
#' 
#' ## Suggest an opacity to use in plotting:
#' (my_alpha <- logistic_tform(nrow(spinifex::penguins_na.rm)))
#'
#' ## Visualize
#' x <- 1:2000
#' plot(x, logistic_tform(x), col = "blue")
logistic_tform = function(
  n, mid_pt = 600, k_attenuation = 5, ceiling = 1, floor = .3
){
  vec <- 1 / (1 + exp(k_attenuation / 1000 * (n - mid_pt)))
  ceiling * (floor + (1 - floor) * vec)
}


#' Assure a full length logical index
#' 
#' Suggests a alpha opacity to plot with as a function of the number of 
#' observation.
#' 
#' @param index A vector, typically a numeric row index of the data to
#' coerce to a logical index.
#' @param n Single numeric, the number of rows of the data use as a replicate
#' return length.
#' @return A logical index of length n.
#' @export
#' @family cheem utility
#' @examples
#' library(cheem)
#' 
#' ## Coerce a numeric index to logical
#' as_logical_index(c(1, 4:10, 15), nrow(mtcars))
#' 
#' ## Logical indexs are unchanged
#' as_logical_index(mtcars$mpg > 30, nrow(mtcars))
as_logical_index <- function(index, n){
  if(is.logical(index) & length(index) != n)
    stop("as_logical_index: `index` was logical, but not of length `n`.")
  if(is.numeric(index) == TRUE){
    rep_f <- rep(FALSE, n)
    rep_f[unique(index)] <- TRUE
    index <- rep_f
  }
  if(identical(index, TRUE)) index <- rep(TRUE, n)
  index
}


#' Development message
#' 
#' Send a message if the 4th chunk of the package version is 9000.
#' @param text A character string to message() if package version is 9000.
devMessage <- function(text){
  version4 <-  utils::packageVersion(pkg = "cheem")[1, 4]
  if(is.na(version4) == FALSE)
    if(version4 == 9000)
      message(paste0("devMessage: ", text))
  
  ## Attempt to stop inaccurate warning (used in shiny app)
  if(F){
    dummy <- DT::datatable()
    dummy <- shinycssloaders::withSpinner()
    dummy <- shinythemes::shinytheme()
  }
}

#' Evaluate if development
#' 
#' Evaluate the expression if the 4th chunk of the package version is 9000.
#' @param expr A character string to message() if package version is 9000.
ifDev <- function(expr){
  version4 <- utils::packageVersion(pkg = "cheem")[1, 4]
  if(is.na(version4) == FALSE)
    if(version4 == 9000)
      eval(expr)
}

Any scripts or data that you put into this service are public.

cheem documentation built on Nov. 9, 2023, 1:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

cheem
Interactively Explore Local Explanations with the Radial Tour

R/0_util.r
In cheem: Interactively Explore Local Explanations with the Radial Tour

Defines functions ifDev devMessage as_logical_index logistic_tform linear_tform rnorm_from contains_nonnumeric problem_type color_scale_of is_diverging is_discrete

Documented in as_logical_index color_scale_of contains_nonnumeric devMessage ifDev is_discrete is_diverging linear_tform logistic_tform problem_type rnorm_from

Try the cheem package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cheem Interactively Explore Local Explanations with the Radial Tour

R/0_util.r In cheem: Interactively Explore Local Explanations with the Radial Tour

Defines functions ifDev devMessage as_logical_index logistic_tform linear_tform rnorm_from contains_nonnumeric problem_type color_scale_of is_diverging is_discrete

Documented in as_logical_index color_scale_of contains_nonnumeric devMessage ifDev is_discrete is_diverging linear_tform logistic_tform problem_type rnorm_from

Try the cheem package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cheem
Interactively Explore Local Explanations with the Radial Tour

R/0_util.r
In cheem: Interactively Explore Local Explanations with the Radial Tour