R/plot.R

Defines functions na_scatter

Documented in na_scatter

#' Scatterplot colored by appearence of missing values
#'
#' @param data               a data.frame.cala
#' @param formula            an object of class "\code{\link[stats]{formula}}":
#'                           a symbolic description of the model to be fitted.
#'                           It must take form of \code{z ~ x + y}, where \code{z}
#'                           is a variable containing missing values and
#'                           \code{x} and \code{y} are the variables to plot.
#' @param col,pch,cex,alpha  a two-value vector of colors, plotting characters,
#'                           scaling, and transparency (value between 0 and 1) for
#'                           non-missing and missing values.
#'                           If provided as a scalar, same value is used for
#'                           both non-missing and missing values.
#' @param legend             position of legend, if \code{NULL}, legend is not
#'                           printed.
#' @param \dots              additional arguments passed to \code{\link{plot}}.
#'
#' @examples
#'
#' set.seed(123)
#'
#' dat <- mtcars
#' dat$disp[sample.int(nrow(dat), 10)] <- NA
#'
#' na_scatter(dat, disp ~ mpg + wt)
#' na_scatter(dat, disp ~ mpg + jitter(cyl, 1.1))
#'
#' @importFrom graphics plot
#'
#' @export

na_scatter <- function(data, formula, col = c("lightblue", "red"),
                       alpha = 0.3, pch = 16L, cex = 1, legend = "topright",
                       ...) {

  if ( !missing(formula) ) {
    y_var <- lhs_vars(formula)
    mf <- model.frame(formula, data = data, na.action = na.pass)
    if ( ncol(mf) != 3L )
      stop("invalid formula")
  } else {
    y_var <- colnames(data)[1L]
    mf <- data
  }

  for ( v in c("col", "alpha", "pch") ) {
    if ( length(get(v)) > 2L)
      message(sprintf("%s has length > 2 and only the first two elements will be used", v))
  }

  alpha <- rep(alpha, length.out = 2L)
  col <- add_alpha(rep(col, length.out = 2L), alpha = alpha)
  pch <- rep(pch, length.out = 2L)

  nas <- is.na(mf[, 1L])

  if ( sum(nas) == 0L )
    warning(paste(y_var, "contains no missing values."))

  plot(mf[, 2L], mf[, 3L], xlab = colnames(mf)[2L],
       ylab = colnames(mf)[3L], col = col[nas + 1L], pch = pch, ...)

  if ( !is.null(legend) ) {
    legend(legend, col = col, pch = pch, bty = "n",
           legend = c("non-missing", "missing"))
  }
}

# na_boxplot <- function(data, formula, col = c("lightblue", "red"),
#                        alpha = 0.3, pch = 16L, ...) {
#
#   if ( length(col) > 2L )
#     message("col has length > 2 and only the first two elements will be used")
#   col <- add.alpha(rep(col, length.out = 2L), alpha = alpha)
#
#   mf <- model.frame(formula, data = data, na.action = na.pass)
#   if ( ncol(mf) != 2L )
#     stop("invalid formula")
#
#   mf[, 1L] <- as.factor(ifelse(is.na(mf[, 1L]), "missing", "non-missing"))
#   frm <- as.formula(paste(colnames(mf)[2L:1L], collapse = " ~ "))
#
#   boxplot(frm, data = mf, ylab = colnames(mf)[2L], ...)
#
# }
twolodzko/misster documentation built on May 24, 2019, 2:54 p.m.