208 Data Analysis

Documented in summaryStats summaryStats.default summaryStats.formula summaryStats.matrix

#' Summary Statistics
#' 
#' Produces a table of summary statistics for the data. If the argument
#' \code{group} is missing, calculates a matrix of summary statistics for the
#' data in \code{x}. If \code{group} is present, the elements of \code{group}
#' are interpreted as group labels and the summary statistics are displayed for
#' each group separately.
#' 
#' @param x either a single vector of values, or a formula of the form
#' data~group, or a matrix.
#' @param data an optional data frame containing the variables in the model.
#' @param group a vector of group labels.
#' @param data.order if \code{TRUE}, the group order is the order which the groups are
#' first encountered in the vector 'group'. If \code{FALSE}, the order is alphabetical.
#' @param digits the number of decimal places to display.
#' @param \dots Optional arguments which are passed to the summary statistic functions. 
#' For example \code{na.rm = TRUE} will help if there are missing values in the 
#' (response) variable.
#' @return If \code{x} is a single variable, i.e. there are no groups, then a
#' single list is invisibly returned with the following named items:
#' \item{min}{Minimum value.} 
#' \item{max}{Maximum value.} 
#' \item{mean}{Mean value.}
#' \item{var}{Variance -- the average of the squares of the deviations
#' of the data values from the sample mean.} 
#' \item{sd}{Standard deviation -- the square root of the variance.} 
#' \item{n}{Number of data values -- size of the data set.}
#' \item{nMissing}{If there are missing values, and \code{na.rm} has been set to \code{TRUE}
#' then this item will contain the number of missing values.}
#' \item{iqr}{Midspread (IQR) -- the range spanned by central half of data; the 
#' interquartile range.} 
#' \item{skewness}{Skewness statistic -- indicates how skewed the data set is. 
#' Positive values indicate right-skew data. Negative values indicate left-skew data.} 
#' \item{lq}{Lower quartile}
#' \item{median}{Median -- the middle value when the batch is ordered.}
#' \item{uq}{Upper quartile} If grouping is provided, either by using the
#' \code{group} argument, or providing a factor in a formula, or by passing a
#' matrix where the different columns represent the groups, then the function
#' will return a \code{data.frame} a row containing all the statistics above
#' for each group.
#' @keywords multivariate
#' @examples
#' 
#' ## STATS20x data:
#' data(course.df)
#' 
#' ## Single variable summary
#' with(course.df, summaryStats(Exam))
#' 
#' ## Using a formula
#' summaryStats(Exam ~ Stage1, course.df)
#' 
#' ## Using a matrix
#' X = cbind(rnorm(50), rnorm(50))
#' summaryStats(X)
#' 
#' ## Saving and extracting the information
#' sumStats = summaryStats(Exam ~ Degree, course.df)
#' sumStats
#' 
#' ## Just the BAs
#' sumStats['BA', ]
#' 
#' ## Just the means
#' sumStats$mean
#' 
#' @importFrom methods hasArg
#' 
#' @export summaryStats
summaryStats = function(x, ...) {
    UseMethod("summaryStats")
}

#' @export
#' @describeIn summaryStats Summary Statistics
summaryStats.default = function(x, group = rep("Data", length(x)), data.order = TRUE, digits = 2, ...) {
    if (!is.factor(group)) {
        group = factor(group, levels = if (data.order) {
            unique(group)
        } else {
            sort(unique(group))
        })
        
    }
    
    getStats = function(x, ...) {
        quantiles = quantile(x, probs = c(0.25, 0.5, 0.75), ...)
        names(quantiles) = NULL  ## strip off the percentages
        
        results = c(min = min(x, ...), 
                    max = max(x, ...), 
                    mean = mean(x, ...), 
                    var = var(x, ...), 
                    sd = sd(x, ...), 
                    n = length(x), 
                    iqr = IQR(x, ...), 
                    skewness = skewness(x, ...), 
                    lq = quantiles[1], 
                    median = quantiles[2], 
                    uq = quantiles[3])
        
        if(hasArg("na.rm")){
            args = list(...)
            if(args$na.rm == TRUE){
                results["nMissing"] = sum(is.na(x))
            }
        }
        
        return(results)
    }
    
    stats = NULL
    
    if (length(unique(group)) == 1) {
        stats = as.list(getStats(x, ...))
        
        with(stats, {
            cat(paste("Minimum value:          ", round(min, digits), "\n"))
            cat(paste("Maximum value:          ", round(max, digits), "\n"))
            cat(paste("Mean value:             ", round(mean, digits), "\n"))
            cat(paste("Median:                 ", round(median, digits), "\n"))
            cat(paste("Upper quartile:         ", round(uq, digits), "\n"))
            cat(paste("Lower quartile:         ", round(lq, digits), "\n"))
            cat(paste("Variance:               ", round(var, digits), "\n"))
            cat(paste("Standard deviation:     ", round(sd, digits), "\n"))
            cat(paste("Midspread (IQR):        ", round(iqr, digits), "\n"))
            cat(paste("Skewness:               ", round(skewness, digits), "\n"))
            cat(paste("Number of data values:  ", n, "\n"))
        })
        
        if("nMissing" %in% names(stats)){
            cat(paste("Number of missing values:  ", stats$nMissing, "\n"))
        }
        
    } else {
        temp.df = data.frame(x = x, group = group)
        na.action = if(hasArg("na.action")){
            args = list(...)
            args$na.action
        }else{
            na.pass
        }
        stats = aggregate(x ~ group, data = temp.df, FUN = getStats, na.action = na.action, ... = ...)
        
        ##resTable = aggregate(x ~ group, data = temp.df, FUN = getStats)
        resTable = stats
        rownames(resTable$x) = resTable$group
        
        if("nMissing" %in% colnames(stats$x)){
            resTable = as.data.frame(resTable$x[,c("n", "nMissing", "mean", "median", "sd", "iqr")])
            resTable$n = resTable$n - resTable$nMissing
            names(resTable) = c("Sample Size", "No. Miss.", "Mean", "Median", "Std Dev", "Midspread")
        }else{
            resTable = as.data.frame(resTable$x[,c("n", "mean", "median", "sd", "iqr")])
            colnames(resTable) = c("Sample Size", "Mean", "Median", "Std Dev", "Midspread")
        }
        
        print(resTable)
        
        rownames(stats$x) = stats$group
        stats = as.data.frame(stats$x)
        
    }
    
    invisible(stats)
}

#' @export
#' @describeIn summaryStats Summary Statistics
summaryStats.formula = function(x, data = NULL, data.order = TRUE, digits = 2, ...) {
    if (missing(x) || (length(x) != 3)) {
        stop("missing or incorrect formula")
    }
    
    if (is.null(data)) {
        vars = eval(attr(terms(x), "variables"), parent.frame())
    } else {
        vars = eval(attr(terms(x), "variables"), data)
    }
    
    summaryStats(vars[[1]], vars[[2]], data.order = data.order, digits = digits, ...)
}

#' @export
#' @describeIn summaryStats Summary Statistics
summaryStats.matrix = function(x, data.order = TRUE, digits = 2, ...) {
    nrows = nrow(x)
    ncols = ncol(x)
    
    x = as.vector(x)
    group = factor(rep(1:ncols, rep(nrows, ncols)))
    
    summaryStats(x, group, data.order = data.order, digits = digits, ...)
}

Any scripts or data that you put into this service are public.

s20x documentation built on Jan. 14, 2026, 9:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

s20x
Functions for University of Auckland Course STATS 201/208 Data Analysis

R/summaryStats.R
In s20x: Functions for University of Auckland Course STATS 201/208 Data Analysis

Defines functions summaryStats.matrix summaryStats.formula summaryStats.default summaryStats

Documented in summaryStats summaryStats.default summaryStats.formula summaryStats.matrix

Try the s20x package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

s20x Functions for University of Auckland Course STATS 201/208 Data Analysis

R/summaryStats.R In s20x: Functions for University of Auckland Course STATS 201/208 Data Analysis

Defines functions summaryStats.matrix summaryStats.formula summaryStats.default summaryStats

Documented in summaryStats summaryStats.default summaryStats.formula summaryStats.matrix

Try the s20x package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

s20x
Functions for University of Auckland Course STATS 201/208 Data Analysis

R/summaryStats.R
In s20x: Functions for University of Auckland Course STATS 201/208 Data Analysis