R/popmoments.R

Defines functions ccov. cvar. cE. Edist cm. int. reg. cor. skew. coskew. cov. R2. Edev2. dev2. dev. z. sd. var. E.

Documented in ccov. cE. cm. cor. coskew. cov. cvar. dev. dev2. E. Edev2. Edist int. R2. reg. sd. skew. var. z.

#' Population-level moments
#'
#' Simple functions for calculating the population-level moments.
#' 
#' The expectation (\code{\link{E.}}), variance (\code{\link{var.}}), 
#' covariance (\code{\link{cov.}}), correlation (\code{\link{cor.}}), 
#' and regression (\code{\link{reg.}}) operators are provided. Note 
#' that each function ends in a \code{.} to distinguish them from
#' other standard functions with similar names. Convenience functions
#' for other quantities are also provided (e.g. \code{\link{skew.}},
#' \code{\link{coskew.}}, \code{\link{cm.}}, \code{\link{int.}},
#' \code{\link{z.}}).  All of these functions act on vectors
#' without regard to dimension information or any other attributes
#' (including S3 class information). Two functions are special in
#' that they do not act on vectors: \code{\link{Edist}} and
#' \code{\link{cm.}}. \code{\link{Edist}} acts on \code{d-functions}
#' (e.g. \code{\link{dnorm}}), to give (or approximate) the expected
#' value of a (univariate) distribution.  \code{\link{cm.}} acts on
#' data frames to give the mixed central moment of the columns of a
#' data frame.
#'
#' @docType package
#' @name popmoments
#' @aliases popmoments package-popmoments popmoments-package
NULL

#' Population-level expectation
#'
#' Expectation of a vector giving the elements of a population.
#'
#' @param x A numeric vector giving the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The expectation of \code{x}.
#' @export
#' @examples
#' E.(1:10)
#' E.(1:10, 10:1)
E. <- function(x, p){
	if(missing(p)) return(mean(x))
	if(any(p < 0)) stop('p must contain positive numbers only')
	if(length(x) != length(p)) stop('p must be same length as x')
	p <- p/sum(p)
	return(sum(x*p))
}

#' Population-level variance
#'
#' Variance of a vector giving the elements of a population.
#'
#' @param x A numeric vector giving the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The variance of \code{x}.
#' @export
#' @examples
#' # Note the difference between population and sample variance
#' var.(1:10)
#' var(1:10)
#' # But we can make up the difference with,
#' var(1:10)*(9/10)
var. <- function(x, p) E.(x^2, p) - (E.(x, p)^2)

#' Population-level standard deviation
#'
#' Standard deviation of a vector giving the elements of a population.
#'
#' @param x A numeric vector giving the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The standard deviation of \code{x}.
#' @export
sd. <- function(x, p) sqrt(E.(x^2, p) - (E.(x, p)^2))

#' Population-level z-score
#'
#' Transform a vector into z-scores using population mean and standard deviation.
#'
#' @param x A numeric vector giving the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The z-scores of \code{x}.
#' @export
z. <- function(x, p) dev.(x, p)/sd.(x, p)

#' Population-level deviation
#'
#' Deviations of the elements of a vector from their population mean.
#'
#' @param x A numeric vector giving the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The deviations of \code{x}.
#' @export
dev. <- function(x, p) x - E.(x, p)

#' Population-level squared deviation
#'
#' Squared deviations of the elements of a vector from their population mean.
#'
#' @param x A numeric vector giving the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The deviations of \code{x}.
#' @export
dev2. <- function(x, p) dev.(x, p)^2

#' Population-level mean squared deviation
#'
#' Mean squared deviations of the elements of a vector from their population mean.
#'
#' @param x A numeric vector giving the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The deviations of \code{x}.
#' @export
Edev2. <- function(x, p) E.(dev2.(x, p), p)


#' Population-level R-squared
#'
#' R-squared
#'
#' @param x A numeric vector giving the population.
#' @param y A numeric vector giving the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The R-squared of \code{x} and \code{y}.
#' @export
R2. <- function(x, y, p) cor.(x, y, p)^2

#' Population-level covariance
#'
#' Covariance of two vectors giving the elements of a bivariate
#' population.
#' 
#' @param x A numeric vector giving the first variable of the population.
#' @param y A numeric vector giving the second variable of the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The covariance of \code{x} and \code{y}.
#' @export
cov. <- function(x, y, p){
	if(length(x) != length(y)) stop('x must be same length as y')
	return(E.(x*y, p) - (E.(x, p)*E.(y, p)))
} 

#' Population-level coskewness
#'
#' Coskewness of three vectors giving the elements of a trivariate
#' population.  Note that this is the unstandardized third mixed central
#' moment. 
#' 
#' @param x A numeric vector giving the first variable of the population.
#' @param y A numeric vector giving the second variable of the population.
#' @param z A numeric vector giving the third variable of the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The coskewness of \code{x} and \code{y} and \code{z}.  This 
#'	is \code{E.((x - E.(x))*(y - E.(y))*(z - E.(z)))}
#' @export
coskew. <- function(x, y, z, p){
	if(length(x) != length(y)) stop('x must be same length as y')
	if(length(x) != length(z)) stop('x must be same length as z')
	if(length(y) != length(z)) stop('y must be same length as z')
	return(E.(dev.(x, p)*dev.(y, p)*dev.(z, p), p))
}

#' Population-level skewness
#'
#' Skewness of a vector giving the elements of a population. Note that this
#' is the unstandardized third central moment.
#' 
#' @param x A numeric vector giving the first variable of the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The skewness of \code{x}.  This 
#'  is \code{E.((x - E.(x))^3)}
#' @export
skew. <- function(x, p) E.(dev.(x, p)^3, p)

#' Population-level correlation
#'
#' Correlation of two vectors giving the elements of a bivariate
#' population.
#' 
#' @param x A numeric vector giving the first variable of the population.
#' @param y A numeric vector giving the second variable of the population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The correlation of \code{x} and \code{y}.
#' @export
cor. <- function(x, y, p) cov.(x, y, p)/sqrt(var.(x, p)*var.(y, p))

#' Population-level regression
#'
#' Regression of one vector on another in a bivariate population.
#' 
#' @param x A numeric vector giving the independent variable of 
#'  the population.
#' @param y A numeric vector giving the dependent variable of the 
#'  population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The regression of \code{y} and \code{x}.
#' @export
reg. <- function(y, x, p) cov.(x, y, p)/var.(x, p)

#' Population-level regression intercept
#'
#' Intercept in the regression of one vector on another in a bivariate population.
#' 
#' @param x A numeric vector giving the independent variable of 
#'  the population.
#' @param y A numeric vector giving the dependent variable of the 
#'  population.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The intercept associated with \code{x} and \code{y}.
#' @export
int. <- function(y, x, p) E.(y, p) - reg.(y, x, p)*E.(x, p)

#' General population-level mixed central moment
#'
#' Mixed central moment of an arbitrary number of vectors in a multivariate population.
#' 
#' @param df A data frame of numeric vectors of the same length.
#' @param p An optional vector of weights.  If missing, equal 
#'  weights are used.
#' @return The mixed central moment of the variables in \code{df}.
#' @export
cm. <- function(df, p){
  if(missing(p)) p <- rep(1, length(df[[1]]))
  if(any(length(df[[1]]) != sapply(df, length))) stop('all variables must be same length')
  df <- sweep(df, 2, apply(df, 2, E., p = p), '-')
  return(E.(apply(df, 1, prod), p))
}

#' Expected value of a distribution
#'
#' Calculate or approximate the expected value of a distribution.
#' If \code{support} is missing, it is assumed that the support is
#' continuous and \code{\link{integrate}} is used to approximate
#' the expected value.  If the support is countably infinite, then
#' \code{support} should be chosen to reduce the error associated
#' numerically approximating infinite sums.
#'
#' @param dfunc Function that takes a random variable as its first
#'  argument and returns its probability (if \code{support} in not
#'  missing or probability density (if \code{support} is missing).
#' @param lower Lower limit of the continuous \code{support} of the
#'  distribution (only has an effect if \code{support} is missing).
#' @param upper Upper limit of the continuous \code{support} of the
#'  distribution (only has an effect if \code{support} is missing).
#' @param support Values at which to evaluate a distribution with
#'  discrete support.
#' @param ... Additional arguments to pass to \code{dfunc}.
#' @return The value of the expected value of \code{dfunc} (if
#'  \code{support} is not missing) or an approximation to the
#'  expected value supplied by \code{\link{integrate}} (if
#'  \code{support} is missing).
#' @export
#' @examples
#' Edist(dnorm) # exactly 0
#' Edist(dpois, lambda = 1, support = 0:3) # bad approximation to 1
#' Edist(dpois, lambda = 1, support = 0:100) # (numerically) exactly 1
#' Edist(dbinom, size = 4, prob = 0.51, support = 0:4)
#' Edist(dgamma, lower = 0, shape = 2.3, scale = 0.2)
#' Edist(sin, lower = -1, upper = 1)
Edist <- function(dfunc, lower = -Inf, upper = Inf, support, ...){
  integrand <- function(x.){x. * dfunc(x. , ...)}
  if(missing(support)) out <- integrate(integrand, lower, upper)
  else out <- sum(integrand(support))
  return(out)
}

#' Conditional expectation
#'
#' @param x A numeric vector giving the population
#' @param g An atomic vector on which to condition
#' @param p weights
#' @return The conditional expectation of \code{x}.
#' @rdname cE.
#' @export
#' @examples
#' n <- 100
#' x <- rnorm(n)
#' y <- rnorm(n)
#' g <- rep(letters[1:10], 10)
#'
#' # law of total expectation
#' E.(x)
#' E.(cE.(x, g))
#'
#' # law of total variance
#' var.(x)
#' E.(cvar.(x, g)) + var.(cE.(x, g))
#'
#' # law of total covariance
#' cov.(x, y) 
#' E.(ccov.(x, y, g)) + cov.(cE.(x, g), cE.(y, g))
cE. <- function(x, g, p) {
    if(missing(p)) {
        return(sapply(split(x, g), mean))
    } else {
        return(mapply(E., split(x, g), split(p, g)))
    }
}

#' @rdname cE.
#' @export
cvar. <- function(x, g, p) {
    cE.(x^2, g, p) - (cE.(x, g, p)^2)
}

#' @param y another vector
#' @rdname cE.
#' @export
ccov. <- function(x, y, g, p) {
    cE.(x*y, g, p) - (cE.(x, g, p) * cE.(y, g, p))
}
stevencarlislewalker/popmoments documentation built on May 26, 2017, 7:59 p.m.