R/stat_smooth_func.R

Defines functions stat_smooth_func

Documented in stat_smooth_func

#' Add function equation to statistical smoothing
#'
#' @inheritParams ggplot2::stat_smooth
#' @param xpos X-position for text
#' @param ypos Y-position for text
#' @source https://gist.github.com/kdauria/524eade46135f6348140
#' @author kdauria
#' @section Computed variables:
#' \describe{
#'   \item{y}{predicted value}
#'   \item{ymin}{lower pointwise confidence interval around the mean}
#'   \item{ymax}{upper pointwise confidence interval around the mean}
#'   \item{se}{standard error}
#' }
stat_smooth_func <- function(mapping = NULL, data = NULL,
                             geom = "smooth", position = "identity",
                             ...,
                             method = "auto",
                             formula = y ~ x,
                             se = TRUE,
                             n = 80,
                             span = 0.75,
                             fullrange = FALSE,
                             level = 0.95,
                             method.args = list(),
                             na.rm = FALSE,
                             show.legend = FALSE,
                             inherit.aes = TRUE,
                             xpos = NULL,
                             ypos = NULL) {
  layer(
    data = data,
    mapping = mapping,
    stat = StatSmoothFunc,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      method = method,
      formula = formula,
      se = se,
      n = n,
      fullrange = fullrange,
      level = level,
      na.rm = na.rm,
      method.args = method.args,
      span = span,
      xpos = xpos,
      ypos = ypos,
      ...
    )
  )
}


StatSmoothFunc <- ggproto(
  "StatSmooth", Stat,

  setup_params = function(data, params) {
    # Figure out what type of smoothing to do: loess for small datasets,
    # gam with a cubic regression basis for large data
    # This is based on the size of the _largest_ group.
    if (identical(params$method, "auto")) {
      max_group <- max(table(data$group))

      if (max_group < 1000) {
        params$method <- "loess"
      } else {
        params$method <- "gam"
        params$formula <- y ~ s(x, bs = "cs")
      }
    }
    if (identical(params$method, "gam")) {
      params$method <- mgcv::gam
    }

    params
  },

  compute_group = function(data, scales, method = "auto", formula = y~x,
                           se = TRUE, n = 80, span = 0.75, fullrange = FALSE,
                           xseq = NULL, level = 0.95, method.args = list(),
                           na.rm = FALSE, xpos=NULL, ypos=NULL) {
    if (length(unique(data$x)) < 2) {
      # Not enough data to perform fit
      return(data.frame())
    }

    if (is.null(data$weight)) data$weight <- 1

    if (is.null(xseq)) {
      if (is.integer(data$x)) {
        if (fullrange) {
          xseq <- scales$x$dimension()
        } else {
          xseq <- sort(unique(data$x))
        }
      } else {
        if (fullrange) {
          range <- scales$x$dimension()
        } else {
          range <- range(data$x, na.rm = TRUE)
        }
        xseq <- seq(range[1], range[2], length.out = n)
      }
    }
    # Special case span because it's the most commonly used model argument
    if (identical(method, "loess")) {
      method.args$span <- span
    }

    if (is.character(method)) method <- match.fun(method)

    base.args <- list(quote(formula), data = quote(data), weights = quote(weight))
    m <- do.call(method, c(base.args, method.args))

    m.rmse <- (sqrt( sum(m$residuals^2) / (length(m$residuals) - 1)) /
      mean(m$fitted.values)) * 100

    eq <- substitute(atop(italic(y) == b %.% italic(x) + a),
                     list(
                       a = as.numeric(format(coef(m)[1], digits = 3)),
                       b = as.numeric(format(coef(m)[2], digits = 3))
                     ))
    func_string = as.character(as.expression(eq))

    if(is.null(xpos)) xpos = min(data$x)*0.9
    if(is.null(ypos)) ypos = max(data$y)*0.9
    data.frame(x=xpos, y=ypos, label=func_string)

  },

  required_aes = c("x", "y")
)
pbsag/outviz documentation built on Dec. 7, 2019, 5:50 a.m.