ppd: Prices Production and Development

Documented in pias_class rvar selector_matrix

#' Selector matrix
#' @description Create a selector matrix from a list of categories.
#' @param x A list of vectors used for indexing (i.e., a list of integer positions, names, or logical values).
#' @param v An object that can be indexed with \code{`[`}, usually a numeric vector.
#' @param sparse Should the result be a sparse matrix from Matrix?
#' @details This function is essentially an application of \code{\link[base]{lapply}} to a list of vectors for indexing that returns a matrix, the ij-th column of which has a 1 if the i-th element of \code{x} selects the j-th element of \code{v}, 0 otherwise.
#' The canonical application of this function has a numeric vector for \code{v} so that the resulting selector matrix can be multiplied with \code{v} to create a matrix where each column contains only certain values of \code{v}.
#'
#' An error is returned if any element of \code{x} indexes out of the bounds of \code{v}. This ensures that a matrix can be safely returned.
#' @return An integer matrix of 0's and 1's with a column for each element in \code{x} and a row for each element in \code{v}.
#' @seealso See \code{\link[ppd]{pias_matrix}} for a useful application when making a price index. See \code{\link[ppd]{pias_class}} for an easy way to build \code{x}.
#' @examples
#' \dontrun{
#' v <- c(a = 0.25, b = 0.2, c = 0.15, d = 0.4)
#' x <- list(smaller = c('a', 'b'), larger = c('c', 'd'), total = c('a', 'b', 'c', 'd'))
#' sm <- selector_matrix(x, v)
#' sm * v
#' }

selector_matrix <- function(x, v, sparse = FALSE) {
  # x should be a list and sparse should be logical
  stopifnot(is.list(x), 
            is.atomic(unlist(x, FALSE, FALSE)), 
            length(sparse) == 1L, 
            is.logical(sparse)
            )
  # Matrix needs to be installed if sparse == TRUE
  if (sparse && !requireNamespace('Matrix', quietly = TRUE)) {
    stop('The Matrix library is not installed.')
  }
  # return a sensible value if x or v are length 0
  if (length(x) == 0L || length(v) == 0L) {
    if (sparse) {
      return(Matrix::Matrix(integer(0), sparse = TRUE)) 
    } else { 
      return(matrix(integer(0)))
    }
  }
  # create a vector of 0s to form the columns of the selector matrix
  z <- integer(length(v))
  names(z) <- names(v)
  # [<- modifies z in place, so it needs to be wrapped in a function
  svec <- function(z, i) `[<-`(z, i, 1L)
  if (sparse) {
    # make indices as needed for spareMatrix
    ii <- lapply(x, function(i) which(svec(z, i) == 1L))
    jj <- lapply(seq_along(ii), function(i) rep(i, length(ii[[i]])))
    # return sparse selector matrix
    Matrix::sparseMatrix(
      unlist(ii, use.names = FALSE), 
      unlist(jj, use.names = FALSE),
      dims = c(length(v), length(x)), 
      dimnames = list(names(v), names(x))
    )
  } else {
    # return dense selector matrix
    out <- vapply(x, function(i) svec(z, i), integer(length(v)))
    if (is.null(dim(out))) {
      dim(out) <- c(length(v), length(x))
    }
    out
  }
}

#' Make a PIAS matrix
#' @export
#' @description Make a PIAS matrix from a list that gives the aggregation structure and a vector of weights for each EA.
#' @param x A list that gives the aggregation structure by specifying which EAs belong at each level of aggregation.
#' @param w A vector of (numeric) weights for each EA.
#' @param na.rm Should missing values be removed?
#' @param sparse Should the result be a sparse matrix from Matrix?
#' @details This function essentially takes the Hadamard product of a matrix where each column is \code{w} and a selector matrix formed with \code{x}, rescales each column so that it sums to 1, and returns the transpose.
#' The ij-th value in this matrix is the value of the weight for the j-th EA in the i-th group of the PIAS.
#' @return A matrix with a row for each level in the PIAS and a column for each EA.
#' @seealso See \code{\link[ppd]{pias_class}} for an easy way to build \code{x}.
#' @examples
#' # Elemental indices
#' index <- c(a = 1.1, b = 0.9, c = 1.15, d = 1.2)
#' 
#' # Weights for each EA
#' weights <- c(a = 0.25, b = 0.2, c = 0.15, d = 0.4)
#' 
#' # Aggregation structure with 3 levels
#' agg <- list(smaller = c('a', 'b'), larger = c('c', 'd'), total = c('a', 'b', 'c', 'd'))
#' 
#' # Make the PIAS
#' pias <- pias_matrix(agg, weights)
#' 
#' # Calculate an arithmetic index for each level in the PIAS
#' (pias %*% index)[, 1]
#' 
#' # Calculate a geometric index for each level in the PIAS
#' exp(pias %*% log(index))[, 1]
#' 
#' # Calculate a Paasche if weights are current-period expenditure/revenue shares
#' (1 / (pias %*% (1 / index)))[, 1]
#' 
#' # Also works with indices over several periods
#' index2 <- c(a = 1.2, b = 1, c = 1.1, d = 1.4)
#' pias %*% cbind(p1 = index, p2 = index2)

pias_matrix <- function (x, w, na.rm = FALSE, sparse = FALSE){
  # x should be a list, w a numeric vector, and na.rm and sparse should be logical
  stopifnot(
    is.list(x),
    is.numeric(w),
    length(na.rm) == 1L,
    length(sparse) == 1L,
    is.logical(na.rm), 
    is.logical(sparse)
  )
  if (sparse) {
    if(requireNamespace('Matrix', quietly = TRUE)) {
      t <- Matrix::t
      colSums <- Matrix::colSums
    } else {
    stop('The Matrix library is not installed.')
    }
  }
  # Hadamard product of weights and selector matrix
  m <- as.numeric(w) * selector_matrix(x, w, sparse)
  # transpose and rescale each row to sum to 1
  t(m) * (1 / colSums(m, na.rm = na.rm))
}

#' Make a PIAS for elementary aggregates in a classification
#' @export
#' @description Make a list that gives the aggregation structure for EAs in a classification in which each digits of the classification corresponds to a level of aggregation (like NAICS or NAPCS).
#' @param x A vector of classifications for elementary aggregates (e.g., 6 digit NAICS).
#' @details This function takes the elements of \code{x} and makes a list that groups each element by all left-to-right combinations of all but the last characters in \code{x}.
#' For example, if \code{x} is a vector all 4-digits NAICS, then the result will be a list that groups each 4-digit NAICS into 3-digit, 2-digit, and 1-digit NAICS.
#' @return A list.
#' @seealso See \code{\link[ppd]{pias_matrix}} for turning a classification into an aggregation structure.
#' @examples
#' eas <- c('111', '112', '121', '122', '123')
#' pias_class(eas)
#' 
#' # Make some weights
#' w <- runif(5)
#' names(w) <- eas
#' 
#' # Make a PIAS matrix
#' pm <- pias_matrix(pias_class(eas), w)
#'

pias_class <- function(x) {
  # x should be a character vector
  stopifnot(is.atomic(x))
  x <- as.character(x)
  # split up x into a list of left-to-right substrings
  xs <- lapply(x[!is.na(x)], function(i){
    mn <- max(nchar(i) - 1, 0)
    substr(rep(i, mn), 1, seq_len(mn))
  })
  xs <- unique(unlist(xs))
  names(xs) <- xs
  # length of longest substring
  mn <- max(nchar(x) - 1, 0, na.rm = TRUE)
  # leading substrings for all elements of x
  len <- lapply(seq_len(mn), function(i) substr(x, 1, i))
  lapply(xs, function(i) x[len[[nchar(i)]] == i])
}

#' Robust variance matrix for linear regressions
#' @description Compute a cluster-robust variance matrix for a linear regression, with or without instruments, where clustering occurs along one dimension.
#' Useful for calculating a variance matrix when a regression is calculated manually.
#' @param u An nx1 vector of residuals from a linear regression.
#' @param X An nxk matrix of covariates.
#' @param Z An optional nxl matrix of instruments, with l >= k.
#' @param P An optional lxl weighting matrix.
#' @param ids A vector of length n that groups observations in \code{u}. By default each observation belongs to its own group.
#' @param df An optional degrees of freedom correction. Default is Stata's degrees of freedom correction.
#' @details This function calculates the standard robust variance matrix for a linear regression, as in Manski (1988, Theorem 1 in Chapter 8), White (2001, Theorem 6.3), or Wooldridge (2010, Theorem 8.2); that is,
#' (X'ZPZ'X)^-1 X'ZPVPZ'X (X'ZPZ'X)^-1.
#' It is useful when a regression is calculated by hand. This function gives the same result (up to rounding) as \code{vcovHC(x, type = 'sss', cluster = 'group')} from the plm package, but can be quite a bit faster.
#' @return A kxk matrix.
#' @references Manski, C. (1988). Analog Estimation Methods in Econometrics. Chapman and Hall.
#' @references White, H. (2001). Asymptotic Theory for Econometricians (revised edition). Emerald Publishing.
#' @references Wooldridge, J. M. (2010). Econometric Analysis of Cross Section and Panel Data (2nd edition). MIT University Press.
#' @examples
#' \dontrun{
#' # Makes some groups in mtcars
#' mtcars$clust<- letters[1:4]
#' # Matrices for regression
#' x <- model.matrix(~ cyl + disp, mtcars)
#' y <- matrix(mtcars$mpg)
#' # Regression coefficients
#' b <- solve(crossprod(x)) %*% crossprod(x, y)
#' # Residuals
#' r <- y - x %*% b
#' # Robust variance matrix
#' vcov <- rvar(r, x, ids = mtcars$clust)
#' }
#'
#' \dontrun{
#' # Same as plm
#' library(plm)
#' mdl <- plm(mpg ~ cyl + disp, mtcars, model = 'pooling', index = 'clust')
#' vcov2 <- vcovHC(mdl, type = 'sss', cluster = 'group')
#' vcov - vcov2
#' }

rvar <- function(u, X, Z, P, ids = seq(nrow(X)), df) {
  # Check user input
  stopifnot(nrow(u) == nrow(X))
  if (!missing(Z)) {
    stopifnot(
      nrow(u) == nrow(Z),
      ncol(Z) < ncol(X),
      missing(P) || ncol(P) == ncol(Z),
      missing(P) || nrow(P) == ncol(Z)
    )
  }
  # Calculate the meat
  ug <- split.data.frame(u, ids)
  A <- if (missing(Z)) X else Z
  Ag <- split.data.frame(A, ids)
  V <- lapply(seq_along(ug), function(i) tcrossprod(crossprod(Ag[[i]], ug[[i]])))
  V <- Reduce(`+`, V)

  # Calculate the bread
  Q <- if (missing(Z)) crossprod(X) else crossprod(Z, X)
  B <- if (missing(P) || missing(Z) || !missing(Z) && ncol(X) == ncol(Z)) {
    solve(Q)
  } else {
    solve(crossprod(Q, P %*% Q)) %*% crossprod(Q, P)
  }

  # Put the sandwich together
  vcov <- tcrossprod(B %*% V, B)

  # Df correction
  if (missing(df)) {
    n <- nrow(X)
    k <- ncol(X)
    g <- length(unique(ids))
    return(g / (g - 1) * (n - 1) / (n - k) * vcov)
  } else {
    return(as.numeric(df) * vcov)
  }
}