R/agg.R

Defines functions .agg

# aggregate the values of a numerical variable y 
#   (a) over the categories of categorical variable x
#   (b) optionally crossed with by and facet variables
# stat: "sum", "mean", "sd", "deviation", "min", "median", "max"
#
# single output structure, regardless of analysis type
#   list(out, ylab)
#     out  : data frame with columns x, <any by/facet columns>, y,
#            one row per (x [, group]) combination
#            a by or facet passed as a vector appears in out as the
#            column "by.var" or "facet.var"
#     ylab : "<Stat label> of <y.name>" for the default y-axis label
#
# Called from: Chart.R, XY.R

.agg <- function(x, y, by=NULL, facet=NULL, stat, y.name=NULL) {

  if (is.data.frame(x)) x <- x[, 1L]

  ylab <- paste(.stat_lbl(stat), "of", y.name)

  if (stat == "deviation") {
    if (!is.null(by) || !is.null(facet)) {
      cat("\n"); stop(call.=FALSE, "\n","------\n",
        "deviation for stat not meaningful with a by or facet variable\n\n")
    }
    out <- tapply(y, x, mean, na.rm=TRUE)
    # deviation of each group mean from the unweighted mean, that is, the
    # average of the group means (not the grand mean of all data values, so
    # each group counts equally regardless of its number of observations)
    out <- out - mean(out, na.rm=TRUE)
    out <- data.frame(x=factor(names(out)), y=as.vector(out))
  }

  else if (is.null(by) && is.null(facet)) {
    out <- tapply(y, x, .stat_fun(stat))
    out <- data.frame(x=factor(names(out)), y=as.vector(out))
  }

  else {  # build data frame: x + (columns from by) + (columns from facet) + y
    df <- data.frame(x = x)

    # by can be vector, matrix, or data frame
    if (!is.null(by)) {
      if (is.data.frame(by))
        df <- cbind(df, by)
      else if (is.matrix(by))
        df <- cbind(df, as.data.frame(by))
      else
        df$by.var <- by
    }

    # facet can be vector, matrix, or data frame
    if (!is.null(facet)) {
      if (is.data.frame(facet))
        df <- cbind(df, facet)
      else if (is.matrix(facet))
        df <- cbind(df, as.data.frame(facet))
      else
        df$facet.var <- facet
    }

    df$y <- y

    out <- stats::aggregate(y ~ ., data=df, FUN=.stat_fun(stat))

    # ensure canonical column names "x" and "y"
    if (!("x" %in% names(out)))
      stop(".agg(): aggregated result must contain a column named 'x'.")
    if (!("y" %in% names(out)))
      stop(".agg(): aggregated result must contain a column named 'y'.")
  }

  # NA/NaN, or +/-Inf from min/max of an all-missing cell, means the
  # statistic could not be computed for one or more cells
  if (any(!is.finite(out$y))) {
    cat("\n"); stop(call.=FALSE, "\n","------\n",
      "The summary table of the transformed data has some missing or\n",
      "   non-finite values, likely because some cells have too few\n",
      "   (or no) data values to compute the specified statistic\n\n")
  }

  return(list(out=out, ylab=ylab))
}

Try the lessR package in your browser

Any scripts or data that you put into this service are public.

lessR documentation built on June 21, 2026, 5:06 p.m.