R/density_absz.R

Defines functions compute_abszdensity stat_abszdensity absz.density

Documented in absz.density stat_abszdensity

#' Density estimates for absolute z-statistics assuming that z-statistics are symmetrically distributed around 0
#'
#' Avoids downward bias at the left hand side where abs(z)=0.
#'
#' @param z vector of z-statistics (or absolute z-statistics)
#' @param at vector of points where density shall be evaluated. If NULL return a function (by calling \code{approxfun}) that allows evaluate the density at arbitrary points.
#' @param bw,adjust,kernel,n,weights,... arguments passed to \code{stats::density}
absz.density <- function(z,at=NULL, bw = 0.1, adjust = 1, kernel = "epanechnikov", n = 1024, weights=NULL,...) {
  restore.point("absz.density")
  z = abs(z)
  to = max(z)

  z.vec = c(-abs(z[z>0]),z[z==0],abs(z[z>0]))
  if (!is.null(weights)) {
    weights = c(weights[z>0], weights[z==0], weights[z>0])
    weights = weights / sum(weights)
  }

  z.dens <- stats::density(z.vec, bw = bw, adjust = adjust,kernel = kernel,from=-to, to=to, n = n,weights=weights,...)

  rows = which(z.dens$x>=0)
  dens.adjust = length(z.dens$x)/length(rows)
  if (is.null(at)) {
    approxfun(x=z.dens$x[rows],y=z.dens$y[rows] *dens.adjust,yleft = z.dens$y[rows[1]]*dens.adjust,yright = 0)
  } else {
    approx(x=z.dens$x[rows],y=z.dens$y[rows] *dens.adjust,xout=at,yleft = z.dens$y[rows[1]]*dens.adjust,yright = 0)$y
  }
}



#' ggplot2 density lines for absolute z-statistics assuming that they are symmetrically distributed around 0
#'
#' Unlike normal [geom_density] or [stat_density] the density estimate does
#' not go artificially decrease at the left bound 0. Note that this function
#' only works nicely if the data starts left with 0. Possibly atoms at z=0
#' should ideally be removed.
#'
#' @param bw The smoothing bandwidth to be used.
#'   If numeric, the standard deviation of the smoothing kernel.
#'   If character, a rule to choose the bandwidth, as listed in
#'   [stats::bw.nrd()].
#' @param adjust A multiplicate bandwidth adjustment. This makes it possible
#'    to adjust the bandwidth while still using the a bandwidth estimator.
#'    For example, `adjust = 1/2` means use half of the default bandwidth.
#' @param kernel Kernel. See list of available kernels in [density()].
#' @param n number of equally spaced points at which the density is to be
#'   estimated, should be a power of two, see [density()] for
#'   details
#' @param trim If `FALSE`, the default, each density is computed on the
#'   full range of the data. If `TRUE`, each density is computed over the
#'   range of that group: this typically means the estimated x values will
#'   not line-up, and hence you won't be able to stack density values.
#'   This parameter only matters if you are displaying multiple densities in
#'   one plot or if you are manually adjusting the scale limits.
#' @section Computed variables:
#' \describe{
#'   \item{density}{density estimate}
#'   \item{count}{density * number of points - useful for stacked density
#'      plots}
#'   \item{scaled}{density estimate, scaled to maximum of 1}
#'   \item{ndensity}{alias for `scaled`, to mirror the syntax of
#'    [`stat_bin()`]}
#' }
#' @export
stat_abszdensity <- function(mapping = NULL, data = NULL,
                         geom = "line", position = "stack",
                         ...,
                         bw = "nrd0",
                         adjust = 1,
                         kernel = "epanechnikov",
                         n = 512,
                         trim = FALSE,
                         na.rm = FALSE,
                         orientation = NA,
                         show.legend = NA,
                         inherit.aes = TRUE) {

  layer(
    data = data,
    mapping = mapping,
    stat = StatAbsZDensity,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      bw = bw,
      adjust = adjust,
      kernel = kernel,
      n = n,
      trim = trim,
      na.rm = na.rm,
      orientation = orientation,
      ...
    )
  )
}

StatAbsZDensity <- ggproto("StatAbsZDensity", Stat,
  required_aes = "x|y",

  default_aes = aes(x = after_stat(density), y = after_stat(density), fill = NA, weight = NULL),

  setup_params = function(data, params) {
    params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE, main_is_continuous = TRUE)

    has_x <- !(is.null(data$x) && is.null(params$x))
    has_y <- !(is.null(data$y) && is.null(params$y))
    if (!has_x && !has_y) {
      abort("stat_density() requires an x or y aesthetic.")
    }

    params
  },

  extra_params = c("na.rm", "orientation"),

  compute_group = function(data, scales, bw = "nrd0", adjust = 1, kernel = "gaussian",
                           n = 512, trim = FALSE, na.rm = FALSE, flipped_aes = FALSE) {
    restore.point("compute_group_symdensity")

    data <- flip_data(data, flipped_aes)
    if (trim) {
      range <- range(data$x, na.rm = TRUE)
    } else {
      range <- scales[[flipped_names(flipped_aes)$x]]$dimension()
    }

    density <- compute_abszdensity(data$x, data$weight, from = 0,
      to = range[2], bw = bw, adjust = adjust, kernel = kernel, n = n)
    density$flipped_aes <- flipped_aes
    flip_data(density, flipped_aes)
  }

)

compute_abszdensity <- function(x, weights=NULL, from, to, bw = "nrd0", adjust = 1, kernel = "gaussian", n = 512) {
  nx <- length(x)

  # if less than 2 points return data frame of NAs and a warning
  if (nx < 2) {
    warn("Groups with fewer than two data points have been dropped.")
    return(new_data_frame(list(
      x = NA_real_,
      density = NA_real_,
      scaled = NA_real_,
      ndensity = NA_real_,
      count = NA_real_,
      n = NA_integer_
    ), n = 1))
  }

  from = 0
  x = abs(x)
  x.vec = c(-abs(x[x>0]),x[x==0],abs(x[x>0]))
  if (!is.null(weights)) {
    weights = c(weights[x>0], weights[x==0], weights[x>0])
    weights = weights / sum(weights)
  }


  dens <- stats::density(x.vec, weights = weights, bw = bw, adjust = adjust, from=-to, to=to,kernel = kernel, n = 2*n)

  rows = which(dens$x>=0)

  ggplot2:::new_data_frame(list(
    x = dens$x[rows],
    density = dens$y[rows] * length(dens$x)/length(rows),
    scaled =  dens$y[rows] / max(dens$y[rows], na.rm = TRUE),
    ndensity = dens$y[rows] / max(dens$y[rows], na.rm = TRUE),
    count =   dens$y[rows] * nx,
    n = nx
  ), n = length(dens$x[rows]))
}
skranz/RoundingMatters documentation built on Dec. 23, 2021, 3:23 a.m.