R/bin.R

Defines functions binAverage binMean2D binCount2D binMean1D binCount1D binApply2D binApply1D

Documented in binApply1D binApply2D binAverage binCount1D binCount2D binMean1D binMean2D

# vim:textwidth=80:expandtab:shiftwidth=4:softtabstop=4

#' Apply a function to vector data
#'
#' The function `FUN` is applied to `f` in bins specified by
#' `xbreaks`.  The division of data into bins is done with [cut()].
#'
#' By default, the sub-intervals defined by the `xbreaks` argument are open
#' on the left and closed on the right, to match the behaviour
#' of [cut()].  An open interval does not include points on
#' the boundary, and so any `x` values that exactly match
#' the first `breaks` value will not be counted.  To include
#' such points in the calculation, set `include.lowest` to TRUE.
#'
#' @param x a vector of numerical values.
#'
#' @param f a vector of data to which `FUN` will be applied.
#'
#' @param xbreaks optional vector holding values of x at the boundaries between bins.
#' If this is not given, it is computed by calling [pretty()] with n=20 segments.
#'
#' @param FUN function that is applied to the `f` values
#' in each x bin.  This must take a single numeric vector
#' as input, and return a single numeric value.
#'
#' @param include.lowest logical value indicating whether to include
#' `x` values that equal `xbreaks[1]`.  See \dQuote{Details}.
#'
#' @param \dots optional arguments to pass to `FUN`.
#'
#' @return A list with the following elements: `xbreaks` as
#' used, `xmids` (the mid-points between those breaks) and
#' `result` (the result of applying `FUN` to the `f` values
#' in the designated bins).
#'
#' @examples
#' library(oce)
#' # salinity profile (black) with 1-dbar bin means (red)
#' data(ctd)
#' plotProfile(ctd, "salinity")
#' p <- ctd[["pressure"]]
#' S <- ctd[["salinity"]]
#' pbreaks <- seq(0, max(p), 1)
#' binned <- binApply1D(p, S, pbreaks, mean)
#' lines(binned$result, binned$xmids, lwd=2, col=rgb(1, 0, 0, 0.9))
#'
#' @author Dan Kelley
#'
#' @family bin-related functions
binApply1D <- function(x, f, xbreaks, FUN, include.lowest=FALSE, ...)
{
    if (missing(x))
        stop("must supply 'x'")
    if (missing(f))
        stop("must supply 'f'")
    if (missing(xbreaks))
        xbreaks <- pretty(x, 20)
    if (missing(FUN))
        stop("must supply 'FUN'")
    if (!is.function(FUN))
        stop("'FUN' must be a function")
    # 2023-06-24 # Stop using C++ for this.
    # 2023-06-24 nbreaks <- length(xbreaks)
    # 2023-06-24 if (nbreaks < 2)
    # 2023-06-24     stop("must have at least 2 breaks")
    # 2023-06-24 xmids <- xbreaks[-1L] - 0.5*diff(xbreaks)
    # 2023-06-24 result <- rep(NA_real_, nbreaks - 1L)
    # 2023-06-24 for (i in seq_len(nbreaks - 1L)) {
    # 2023-06-24     look <- if (i == 1L && include.lowest)
    # 2023-06-24         xbreaks[i] <= x & x <= xbreaks[i+1]
    # 2023-06-24     else
    # 2023-06-24         xbreaks[i] < x & x <= xbreaks[i+1]
    # 2023-06-24     if (any(look))
    # 2023-06-24         result[i] <- FUN(f[look])
    # 2023-06-24 }
    # Use cut, instead of old loopy code ... why re-invent the wheel?
    xmids <- xbreaks[-1L] - 0.5*diff(xbreaks)
    ysplit <- split(f, cut(x, xbreaks, include.lowest=include.lowest))
    result <- unname(sapply(ysplit, FUN))
    result[!is.finite(result)] <- NA
    list(xbreaks=xbreaks, xmids=xmids, result=result)
} # binApply1D()

#' Apply a function to matrix data
#'
#' The function `FUN` is applied to `f` in bins specified by
#' `xbreaks` and `ybreaks`.
#'
#' The division into bins is done with [cut()], to which `include.lowest`
#' is passed. By default, the `x` bins are open at the left and closed
#' on the right, and the `y` bins are open at the bottom and closed
#' at the top.  However, if `include.lowest` is TRUE, then those
#' boundary points are included in the calculation.
#'
#' @param x a vector of numerical values.
#'
#' @param y a vector of numerical values.
#'
#' @param f a vector of data to which `FUN` will be applied.
#'
#' @param xbreaks values of `x` at the boundaries between the
#' bins; calculated using [pretty()] if not supplied.
#'
#' @param ybreaks as `xbreaks`, but for `y`.
#'
#' @param FUN function that is applied to the `f` values
#' in each (x,y) bin.  This must take two numeric vectors
#' as input, and return a single numeric value.
#'
#' @param include.lowest logical value indicating whether to include
#' `x` values that equal `xbreaks[1]` and `y` values that equal
#' `ybreaks[1]`.  See \dQuote{Details}.
#'
#' @param \dots optional arguments to pass to `FUN`.
#'
#' @return A list with the following elements:
#' `xbreaks` and `ybreaks` as used, mid-points
#' `xmids` and `ymids`, and `result`, a matrix containing the
#' result of applying `FUN()` to the `f` values
#' in the designated bins.
#'
#' @author Dan Kelley
##
## @examples
## library(oce)
##\donttest{
## # secchi depths in lat and lon bins
## if (requireNamespace("ocedata", quietly=TRUE)) {
##     data(secchi, package="ocedata")
##     # Note that zlim is provided to the colormap(), to prevent a few
##     # points from setting a very wide scale.
##     cm <- colormap(z=secchi$depth, col=oceColorsViridis, zlim=c(0, 15))
##     par(mar=c(2, 2, 2, 2))
##     drawPalette(colormap=cm, zlab="Secchi Depth")
##     data(coastlineWorld)
##     mapPlot(coastlineWorld, longitudelim=c(-5, 20), latitudelim=c(50, 66),
##         grid=5, col="gray", projection="+proj=lcc +lat_1=50 +lat_2=65")
##     bc <- binApply2D(secchi$longitude, secchi$latitude,
##         pretty(secchi$longitude, 80), pretty(secchi$latitude, 40),
##         f=secchi$depth, FUN=mean)
##     mapImage(bc$xmids, bc$ymids, bc$result, zlim=cm$zlim, col=cm$zcol)
##     mapPolygon(coastlineWorld, col="gray")
## }
##}
#'
#' @family bin-related functions
binApply2D <- function(x, y, f, xbreaks, ybreaks, FUN, include.lowest=FALSE, ...)
{
    if (missing(x))
        stop("must supply 'x'")
    if (missing(y))
        stop("must supply 'y'")
    if (missing(f))
        stop("must supply 'f'")
    nx <- length(x)
    if (nx != length(y))
        stop("lengths of x and y must agree")
    if (missing(xbreaks))
        xbreaks <- pretty(x, 20)
    if (missing(ybreaks))
        ybreaks <- pretty(y, 20)
    if (missing(FUN))
        stop("must supply 'FUN'")
    if (!is.function(FUN))
        stop("'FUN' must be a function")
    nxbreaks <- length(xbreaks)
    if (nxbreaks < 2)
        stop("must have more than 1 xbreak")
    nybreaks <- length(ybreaks)
    if (nybreaks < 2)
        stop("must have more than 1 ybreak")
    res <- matrix(NA_real_, nrow=nxbreaks-1, ncol=nybreaks-1)
    ycut <- cut(y, ybreaks, labels=FALSE, include.lowest=include.lowest)
    F <- split(f, ycut)
    X <- split(x, ycut)
    #cat("next is F before loop\n");print(F)
    #cat("next is X before loop\n");print(X)
    for (iF in seq_along(F)) {
        xcut <- cut(X[[iF]], xbreaks, labels=FALSE, include.lowest=include.lowest)
        FF <- split(F[[iF]], xcut)
        #browser()
        ii <- as.integer(names(FF))
        jj <- as.integer(names(F)[[iF]])
        resiijj <- unlist(lapply(FF, FUN, ...))
        #message(vectorShow(dim(res)))
        #message("ii=", paste(ii, collapse=" "), ", jj=", paste(jj, collapse=" "),
        #    ", resiijj=", paste(resiijj, collapse=" "))
        res[ii, jj] <- resiijj
        #message("set res[",
        #    paste(ii, collapse=" "), ", ",
        #    paste(jj, collapse=" "), "] to ",
        #    paste(resiijj, collapse=" "))
    }
    res[!is.finite(res)] <- NA
    list(xbreaks=xbreaks, xmids=xbreaks[-1]-0.5*diff(xbreaks),
        ybreaks=ybreaks, ymids=ybreaks[-1]-0.5*diff(ybreaks),
        result=res)
} # binApply2D()


#' Bin-count vector data
#'
#' Count the number of elements of a given vector that fall within
#' successive pairs of values within a second vector.
#'
#' By default, the sub-intervals defined by the `xbreaks` argument are open
#' on the left and closed on the right, to match the behaviour
#' of [cut()].  An open interval does not include points on
#' the boundary, and so any `x` values that exactly match
#' the first `breaks` value will not be counted.  To count such
#' points, set `include.lowest` to TRUE.
#'
#' To contextualize `binCount1D()` in terms of base R functions,
#' note that
#' ```
#' binCount1D(1:20, seq(0, 20, 2))$number
#' ```
#' matches
#' ```
#' unname(table(cut(1:20, seq(0, 20, 2))))
#' ```
#'
#' @param x vector of numerical values.
#'
#' @param xbreaks Vector of values of x at the boundaries between bins, calculated using
#' [pretty()] if not supplied.
#'
#' @param include.lowest logical value indicating whether to include
#' `x` values that equal `xbreaks[1]`.  See \dQuote{Details}.
#'
#' @return A list with the following elements: the breaks (`xbreaks`,
#' midpoints (`xmids`) between those breaks, and
#' the count (`number`) of `x` values between successive breaks.
#'
#' @author Dan Kelley
#'
#' @family bin-related functions
binCount1D <- function(x, xbreaks, include.lowest=FALSE)
{
    if (missing(x))
        stop("must supply 'x'")
    if (missing(xbreaks))
        xbreaks <- pretty(x)
    nxbreaks <- length(xbreaks)
    if (nxbreaks < 2)
        stop("must have more than 1 break")
    # 2023-06-24 # stop using C++ for this.
    # 2023-06-24 res <- .C("bin_count_1d",
    # 2023-06-24     nx=length(x),
    # 2023-06-24     x=as.double(x),
    # 2023-06-24     nxbreaks=as.integer(nxbreaks),
    # 2023-06-24     xbreaks=as.double(xbreaks),
    # 2023-06-24     include_lowest=as.integer(include.lowest),
    # 2023-06-24     number=integer(nxbreaks-1L),
    # 2023-06-24     NAOK=TRUE, PACKAGE="oce")
    C <- cut(x, xbreaks, include.lowest=include.lowest)
    number <- unlist(unname(sapply(split(x, C), length)))
    list(xbreaks=xbreaks,
        xmids=xbreaks[-1L] - 0.5*diff(xbreaks),
        number=number)
} # binCount1D


#' Bin-average f=f(x)
#'
#' Average the values of a vector `f` in bins defined on another
#' vector `x`. The values are broken up into bins using [cut()].
#'
#' By default, the sub-intervals defined by the `xbreaks` argument are open
#' on the left and closed on the right, to match the behaviour
#' of [cut()].  An open interval does not include points on
#' the boundary, and so any `x` values that exactly match
#' the first `breaks` value will not be counted.  To include
#' such points in the calculation, set `include.lowest` to TRUE.
#'
#' @param x vector of numerical values that will be categorized into
#' bins via the `xbreaks` parameter.
#'
#' @param f vector of numerical values that are associated with the `x` values.
#'
#' @param xbreaks vector of values of `x` at the boundaries between bins, calculated using
#' [pretty()] if not supplied.
#'
#' @param include.lowest logical value indicating whether to include
#' `x` values that equal `xbreaks[1]`.  See \dQuote{Details}.
#'
#' @param na.rm logical value indicating whether to remove NA values before
#' doing the computation of the average. This is passed to [mean()], which
#' does the work of the present function.
#'
#' @return A list with the following elements: the breaks (`xbreaks`,
#' midpoints (`xmids`) between those breaks,
#' the count (`number`) of `x` values between successive breaks,
#' and the resultant average (`result`) of `f`, classified by the
#' `x` breaks.
#'
#' @examples
#' # Plot raw temperature profile as circles, with lines indicating
#' # the result of averaging in 1-metre depth intervals.
#' library(oce)
#' data(ctd)
#' z <- ctd[["z"]]
#' T <- ctd[["temperature"]]
#' plot(T, z, cex=0.3)
#' TT <- binMean1D(z, T, seq(-100, 0, 1))
#' lines(TT$result, TT$xmids, col=rgb(1, 0, 0, 0.9), lwd=2)
#'
#' @author Dan Kelley
#'
#' @family bin-related functions
binMean1D <- function(x, f, xbreaks, include.lowest=FALSE, na.rm=FALSE)
{
    if (missing(x))
        stop("must supply 'x'")
    fGiven <- !missing(f)
    if (!fGiven)
        f <- rep(1, length(x))
    nx <- length(x)
    if (nx != length(f))
        stop("lengths of x and f must agree")
    if (missing(xbreaks))
        xbreaks <- pretty(x)
    nxbreaks <- length(xbreaks)
    if (nxbreaks < 2)
        stop("must have more than 1 break")
    # 2023-06-24 # Stop using C++ for this.
    # 2023-06-24 res <- .C("bin_mean_1d",
    # 2023-06-24     nx=length(x),
    # 2023-06-24     x=as.double(x),
    # 2023-06-24     f=as.double(f),
    # 2023-06-24     nxbreaks=length(xbreaks),
    # 2023-06-24     xbreaks=as.double(xbreaks),
    # 2023-06-24     include_lowest=as.integer(include.lowest),
    # 2023-06-24     number=integer(nxbreaks-1),
    # 2023-06-24     result=double(nxbreaks-1),
    # 2023-06-24     NAOK=TRUE, PACKAGE="oce")
    if (na.rm) {
        ok <- is.finite(x) & is.finite(f)
        x <- x[ok]
        f <- f[ok]
    }
    C <- cut(x, xbreaks, include.lowest=include.lowest)
    S <- split(f, C)
    number <- unlist(unname(lapply(S, length)))
    result <- unlist(unname(lapply(S, mean)))
    result[!is.finite(result)] <- NA
    list(xbreaks=xbreaks,
        xmids=xbreaks[-1L] - 0.5*diff(xbreaks),
        number=number,
        result=result)
} # binMean1D

#' Bin-count matrix data
#'
#' Count the number of elements of a given matrix z=z(x,y) that fall within
#' successive pairs of breaks in x and y.
#'
#' By default, the sub-intervals defined by `xbreaks` and `ybreaks`
#' are open on the left/bottom and closed on the right/top, to match
#' the behaviour of [cut()].  An open interval does not include
#' points on the boundary, and so any `x` and `y` values that equal
#' `xbreaks[1]` or `ybreaks[1]` will not be counted.  To include
#' such points in the calculation, set `include.lowest` to TRUE.
#'
#' @param x,y vectors of numerical values.
#'
#' @param xbreaks,ybreaks vector of values of `x` and `y`
#' at the boundaries between the 2D bins, calculated using
#' [pretty()] on each of `x` and `y`, if not supplied.
#'
#' @param flatten A logical value indicating whether
#' the return value also contains equilength
#' vectors `x`, `y`, `z` and `n`, a flattened
#' representation of `xmids`, `ymids`, `result` and
#' `number`.
#'
#' @param include.lowest logical value indicating whether to include
#' points where `x` equals `xbreaks[1]` or `y` equals `ybreaks[1]`.
#'
#' @return A list with the following elements: the breaks (`xbreaks`
#' and `ybreaks`), the midpoints (`xmids` and `ymids`)
#' between those breaks, and
#' the count (`number`) of `f` values in the boxes defined
#' between successive breaks.
#'
#' @author Dan Kelley
#'
#' @family bin-related functions
binCount2D <- function(x, y, xbreaks, ybreaks, flatten=FALSE, include.lowest=FALSE)
{
    if (missing(x))
        stop("must supply 'x'")
    if (missing(y))
        stop("must supply 'y'")
    if (length(x) != length(y))
        stop("lengths of x and y must agree")
    if (missing(xbreaks))
        xbreaks <- pretty(x)
    if (missing(ybreaks))
        ybreaks <- pretty(y)
    nxbreaks <- length(xbreaks)
    if (nxbreaks < 2)
        stop("must have more than 1 xbreak")
    nybreaks <- length(ybreaks)
    if (nybreaks < 2)
        stop("must have more than 1 ybreak")
    # 2023-06-24 # Stop using C++ for this.
    # 2023-06-24 M <- .C("bin_count_2d",
    # 2023-06-24     nx=length(x),
    # 2023-06-24     x=as.double(x),
    # 2023-06-24     y=as.double(y),
    # 2023-06-24     nxbreaks=nxbreaks,
    # 2023-06-24     xbreaks=as.double(xbreaks),
    # 2023-06-24     nybreaks=nybreaks,
    # 2023-06-24     ybreaks=as.double(ybreaks),
    # 2023-06-24     include_lowest=as.integer(if (include.lowest) 1L else 0L),
    # 2023-06-24     number=integer((nxbreaks-1) * (nybreaks-1)),
    # 2023-06-24     NAOK=TRUE,
    # 2023-06-24     PACKAGE="oce")
    # 2023-06-24 res <- list(xbreaks=xbreaks,
    # 2023-06-24     ybreaks=ybreaks,
    # 2023-06-24     xmids=xbreaks[-1] - 0.5 * diff(xbreaks),
    # 2023-06-24     ymids=ybreaks[-1] - 0.5 * diff(ybreaks),
    # 2023-06-24     number=matrix(M$number, nrow=nxbreaks-1))
    res <- binApply2D(x, y, rep(1, length(x)), xbreaks, ybreaks, length, include.lowest=include.lowest)
    names(res) <- gsub("result", "number", names(res))
    dim <- dim(res$number)
    res$number <- as.integer(res$number)
    dim(res$number) <- dim
    res$number[is.na(res$number)] <- 0L
    if (flatten) {
        res2 <- list()
        res2$x <- rep(res$xmids, times=nybreaks-1)
        res2$y <- rep(res$ymids, each=nxbreaks-1)
        res2$n <- as.vector(res$number)
        res <- res2
    }
    res
} # binCount2D


#' Bin-average f=f(x,y)
#'
#' Average the values of a vector `f(x,y)` in bins defined on
#' vectors `x` and `y`. A common example might be averaging
#' spatial data into location bins.
#'
#' @param x vector of numerical values.
#'
#' @param y vector of numerical values.
#'
#' @param f Matrix of numerical values, a matrix f=f(x,y).
#'
#' @param xbreaks Vector of values of `x` at the boundaries between bins, calculated using
#' [`pretty`]`(x)` if not supplied.
#'
#' @param ybreaks Vector of values of `y` at the boundaries between bins, calculated using
#' [`pretty`]`(y)` if not supplied.
#'
#' @param flatten A logical value indicating whether
#' the return value also contains equilength
#' vectors `x`, `y`, `z` and `n`, a flattened
#' representation of `xmids`, `ymids`, `result` and
#' `number`.
#'
#' @param fill Logical value indicating whether to fill `NA`-value gaps in
#' the matrix. Gaps will be filled as the average of linear interpolations
#' across rows and columns. See `fillgap`, which works together with this.
#'
#' @param fillgap Integer controlling the size of gap that can be filled
#' across. If this is negative (as in the default), gaps will be filled
#' regardless of their size. If it is positive, then gaps exceeding this
#' number of indices will not be filled.
#'
#' @param include.lowest logical value indicating whether to include
#' `y` values for which the corresponding `x` is equal to `xmin`.
#' See \dQuote{Details}.
#'
#' @param na.rm logical value indicating whether to remove NA values before
#' doing the computation of the average. This is passed to [mean()], which
#' does the work of the present function.
#'
#' @return By default, i.e. with `flatten` being FALSE, [binMean2D()] returns a
#' list with the following elements: `xmids`, a vector holding the x-bin midpoints;
#' `ymids`, a vector holding the y-bin midpoints; `number`, a matrix holding the
#' number the points in each bin; and `result`, a matrix holding the mean
#' value in each bin. If `flatten` is TRUE, the `number` and `result`
#' matrices are renamed as `n` and `f` and transformed to vectors, while
#' the bin midpoints are renamed as `x` and `y` and extended to match the length
#' of `n` and `f`.
#'
#' @examples
#' library(oce)
#' x <- runif(500, 0, 0.5)
#' y <- runif(500, 0, 0.5)
#' f <- x^2 + y^2
#' xb <- seq(0, 0.5, 0.1)
#' yb <- seq(0, 0.5, 0.1)
#' m <- binMean2D(x, y, f, xb, yb)
#' cm <- colormap(f, col=oceColorsTurbo)
#' opar <- par(no.readonly=TRUE)
#' drawPalette(colormap=cm)
#' plot(x, y, col=cm$zcol, pch=20, cex=1.4)
#' contour(m$xmids, m$ymids, m$result, add=TRUE, labcex=1.4)
#' par(opar)
#'
#' @author Dan Kelley
#'
#' @family bin-related functions
binMean2D <- function(x, y, f, xbreaks, ybreaks, flatten=FALSE, fill=FALSE, fillgap=-1, include.lowest=FALSE, na.rm=FALSE)
{
    if (missing(x))
        stop("must supply 'x'")
    if (missing(y))
        stop("must supply 'y'")
    if (fillgap == 0)
        stop("cannot have a negative 'fillgap' value")
    fGiven <- !missing(f)
    if (!fGiven)
        f <- rep(1, length(x))
    if (length(x) != length(y))
        stop("lengths of x and y must agree, but they are ", length(x), " and ", length(y))
    if (length(x) != length(f))
        stop("lengths of x and f must agree, but they are ", length(x), " and ", length(f))
    if (missing(xbreaks))
        xbreaks <- pretty(x)
    if (missing(ybreaks))
        ybreaks <- pretty(y)
    nxbreaks <- length(xbreaks)
    if (nxbreaks < 2)
        stop("must have more than 1 xbreak")
    nybreaks <- length(ybreaks)
    if (nybreaks < 2)
        stop("must have more than 1 ybreak")
    resCount <- binCount2D(x=x, y=y, xbreaks=xbreaks, ybreaks=ybreaks, include.lowest=include.lowest)
    resMean <- binApply2D(x=x, y=y, f=f, xbreaks=xbreaks, ybreaks=ybreaks, FUN=mean, include.lowest=include.lowest, na.rm=TRUE)
    # 2023-06-25 M <- .C("bin_mean_2d", length(x), as.double(x), as.double(y), as.double(f),
    # 2023-06-25     length(xbreaks), as.double(xbreaks),
    # 2023-06-25     length(ybreaks), as.double(ybreaks),
    # 2023-06-25     as.integer(fill), as.integer(fillgap),
    # 2023-06-25     number=integer((nxbreaks-1) * (nybreaks-1)),
    # 2023-06-25     mean=double((nxbreaks-1) * (nybreaks-1)),
    # 2023-06-25     NAOK=TRUE, PACKAGE="oce")
    res <- list(xbreaks=xbreaks,
        ybreaks=ybreaks,
        xmids=xbreaks[-1] - 0.5 * diff(xbreaks),
        ymids=ybreaks[-1] - 0.5 * diff(ybreaks),
        number=resCount$number,
        result=resMean$result)
        #number=matrix(M$number, nrow=nxbreaks-1),
        #result=if (fGiven) matrix(M$mean, nrow=nxbreaks-1) else matrix(NA, ncol=nybreaks-1, nrow=nxbreaks-1))
    if (flatten) {
        res2 <- list()
        res2$x <- rep(res$xmids, times=nybreaks-1)
        res2$y <- rep(res$ymids, each=nxbreaks-1)
        res2$f <- as.vector(res$result)
        res2$n <- as.vector(res$number)
        res <- res2
    }
    res
} # binMean2D

#' Bin-average a vector y, based on x values
#'
#' [binAverage()] works by calling [binMean1D()], after computing
#' the `xbreaks` parameter of the latter function as `seq(xmin,xmax,xinc)`.
#' Note that the return value of [binAverage()] uses only the `xmids` and
#' `result` entries of the [binMean1D()] result.
#'
#' By default, the sub-intervals defined by `xmin`, `xinc` and `xmax`
#' arguments are open on the left and closed on the right, to match
#' the behaviour of [cut()].  An open interval does not include
#' points on the boundary, and so any `x` values that exactly match
#' the first `breaks` value will not be counted.  To include
#' such points in the calculation, set `include.lowest` to TRUE.
#'
#' @param x a vector of numerical values.
#'
#' @param y a vector of numerical values.
#'
#' @param xmin x value at the lower limit of first bin; the minimum `x`
#' will be used if this is not provided.
#'
#' @param xmax x value at the upper limit of last bin; the maximum `x`
#' will be used if this is not provided.
#'
#' @param xinc width of bins, in terms of x value; 1/10th of `xmax-xmin`
#' will be used if this is not provided.
#'
#' @param include.lowest logical value indicating whether to include
#' `y` values for which the corresponding `x` is equal to `xmin`.
#' See \dQuote{Details}.
#'
#' @param na.rm logical value indicating whether to remove NA values before
#' doing the computation of the average. This is passed to [mean()], which
#' does the work of the present function.
#'
#' @return A list with two elements: `x`, the mid-points of the bins, and
#' `y`, the average `y` value in the bins.
#'
#' @author Dan Kelley
#'
#' @examples
#' library(oce)
#' # A. fake linear data
#' x <- seq(0, 100, 1)
#' y <- 1 + 2 * x
#' plot(x, y, pch=1)
#' ba <- binAverage(x, y)
#' points(ba$x, ba$y, pch=3, col="red", cex=3)
#'
#' # B. fake quadratic data
#' y <- 1 + x ^2
#' plot(x, y, pch=1)
#' ba <- binAverage(x, y)
#' points(ba$x, ba$y, pch=3, col="red", cex=3)
#'
#' # C. natural data
#' data(co2)
#' plot(co2)
#' avg <- binAverage(time(co2), co2, 1950, 2000, 2)
#' points(avg$x, avg$y, col="red")
#'
#' @family bin-related functions
binAverage <- function(x, y, xmin, xmax, xinc, include.lowest=FALSE, na.rm=FALSE)
{
    if (missing(y))
        stop("must supply 'y'")
    if (missing(xmin))
        xmin <- min(as.numeric(x), na.rm=TRUE)
    if (missing(xmax))
        xmax <- max(as.numeric(x), na.rm=TRUE)
    if (missing(xinc))
        xinc  <- (xmax - xmin) / 10
    if (xmax <= xmin)
        stop("must have xmax > xmin")
    if (xinc <= 0)
        stop("must have xinc > 0")
    xbreaks <- seq(xmin, xmax, xinc)
    res <- binMean1D(x, y, xbreaks, include.lowest=include.lowest, na.rm=na.rm)
    list(x=res$xmids, y=res$result)
}

Try the oce package in your browser

Any scripts or data that you put into this service are public.

oce documentation built on July 9, 2023, 5:18 p.m.