R/trace.R
In lfe: Linear Group Fixed Effects

Documented in mctrace

# $Id: trace.R 1943 2016-04-07 23:08:38Z sgaure $
# compute the trace of a matrix.
# If we have a list of factors defining a projection, or a function for
# multiplying the matrix with a matrix, we use an iterative method
# from quantum physics, based on the general formula for the expectation
# of a quadratic form E(x' M x) = tr(MV) + E(x)' M E(x) where V=Cov(x). It reduces
# to
# E(x' M x) = tr(M)
# when x has zero expectation and identity covariance matrix
# the most efficient (lowest variation) is to draw x uniformly from {-1,1}
# "Random phase vector for calculating the trace of a large matrix",
# T. Iitaka and T. Ebisuzaki, Physical Review E 69 (2004).







#' Compute trace of a large matrix by sample means
#'
#'
#' Some matrices are too large to be represented as a matrix, even as a sparse
#' matrix.  Nevertheless, it can be possible to compute the matrix vector
#' product fairly easy, and this is utilized to estimate the trace of the
#' matrix.
#'
#' `mctrace` is used internally by [fevcov()] and
#' [bccorr()], but has been made public since it might be useful for
#' other tasks as well.
#'
#' For any matrix \eqn{A}, the trace equals the sum of the diagonal elements,
#' or the sum of the eigenvalues. However, if the size of the matrix is very
#' large, we may not have a matrix representation, so the diagonal is not
#' immediately available.  In that case we can use the formula \eqn{tr(A) =
#' E(x^t A x)}{tr(A) = E(x'Ax)} where \eqn{x} is a random vector with zero
#' expectation and \eqn{Var(x) = I}. We estimate the expectation with sample
#' means.  `mctrace` draws \eqn{x} in \eqn{\{-1,1\}^N}{{-1,1}}, and
#' evaluates `mat` on these vectors.
#'
#' If `mat` is a function, it must be able to take a matrix of column
#' vectors as input.  Since \eqn{x^t A x = (Ax,x)}{x'Ax = (Ax,x)} is evaluated,
#' where \eqn{(\cdot,\cdot)}{(,)} is the Euclidean inner product, the function
#' `mat` can perform this inner product itself. In that case the function
#' should have an attribute `attr(mat,'IP') <- TRUE` to signal this.
#'
#' If `mat` is a list of factors, the matrix for which to estimate the
#' trace, is the projection matrix which projects out the factors. I.e.  how
#' many dimensions are left when the factors have been projected out.  Thus, it
#' is possible to estimate the degrees of freedom in an OLS where factors are
#' projected out.
#'
#' The tolerance `tol` is a relative tolerance.  The iteration terminates
#' when the normalized standard deviation of the sample mean (s.d. divided by
#' absolute value of the current sample mean) goes below `tol`.  Specify a
#' negative `tol` to use the absolute standard deviation.  The tolerance
#' can also change during the iterations; you can specify
#' \code{tol=function(curest) {...}} and return a tolerance based on the
#' current estimate of the trace (i.e. the current sample mean).
#'
#' @param mat square matrix, Matrix, function or list of factors.
#' @param N integer. if `mat` is a function, the size of the matrix is
#' specified here.
#' @param tol numeric. Tolerance.
#' @param maxsamples numeric. Maximum number of samples in the expectation
#' estimation.
#' @param trname character. Arbitrary name used in progress reports.
#' @param init numeric. Initial guess for the trace.
#' @return An estimate of the trace of the matrix represented by `mat` is
#' returned.
#' @examples
#' options(lfe.threads = 1, digits = 5, warn = 1)
#' f1 <- factor(sample(1500, 3000, replace = TRUE))
#' f2 <- factor(sample(1500, 3000, replace = TRUE))
#' fl <- list(f1, f2)
#' mctrace(fl, tol = -5)
#' # exact:
#' # length(f1) - nlevels(f1) - nlevels(f2) + nlevels(compfactor(fl))
#'
#' @export mctrace
mctrace <- function(mat, N, tol = 1e-3, maxsamples = Inf,
                    trname = "", init) {
  if (is.matrix(mat) || inherits(mat, "Matrix")) {
    return(structure(sum(diag(mat)), sd = 0, iterations = 0))
  } else if (is.list(mat) && all(sapply(mat, is.factor))) {
    N <- length(mat[[1]])
    fun <- function(v, trtol) colSums(demeanlist(v, mat) * v)
  } else if (!is.function(mat)) {
    stop("mat must be function, factor list or matrix")
  } else {
    if (missing(N)) stop("N (vector length) must be specified with mat a function")
    if (isTRUE(attr(mat, "IP"))) {
      # inner product is done by the function itself.
      fun <- mat
    } else {
      fun <- function(v, trtol) colSums(mat(v) * v)
    }
  }

  if (!is.function(tol)) eps <- function(x) tol else eps <- tol

  threads <- getOption("lfe.threads")
  if (maxsamples < threads) threads <- maxsamples
  maxB <- getOption("lfe.bootmem") * 1e+06
  maxvpt <- maxB %/% (2 * 8 * N * threads)
  if (maxvpt * threads > 4096) maxvpt <- 4096 %/% threads
  #  if(maxvpt == 0) {
  #    maxvpt <- 1
  #  }
  # ensure at least 8 vectors in first iteration
  if (threads >= 8) {
    vpt <- 1
  } else {
    vpt <- 16 %/% (threads + 1)
  }


  blk <- vpt * threads
  if (blk > maxsamples) blk <- maxsamples
  i <- 0
  tr <- 0
  sqsum <- 0
  NN <- 0
  last <- start <- Sys.time()
  # get a clue about the tolerance.
  #  cureps <- eps(as.numeric(fun(0, trtol=0)))/2
  if (missing(init)) init <- N
  cureps <- eps(init) / 2
  pint <- getOption("lfe.pint")

  while (NN < maxsamples && (NN < 8 ||
    (cureps > 0 && relsd > cureps) ||
    (cureps < 0 && sd > -cureps))) {
    i <- i + 1
    now <- Sys.time()
    if (NN > 0) {
      remaining <- as.integer((Ntarget - NN) / (NN / as.numeric(now - start, units = "secs")))
      if (remaining > pint && as.numeric(now - last, units = "secs") > pint) {
        message(
          "  *** trace ", trname, " sample ", NN, " of ", Ntarget,
          ",mean ", signif(tr / NN, 3), ", sd ", signif(sd, 3), ", target ", signif(cureps, 3),
          ", expected finish at ",
          now + remaining
        )
        last <- now
      }
    }

    ests <- fun(matrix(sample(c(-1, 1), N * blk, replace = TRUE), N), trtol = abs(cureps))
    gc()
    #    cat('ests : ', mean(ests), ' ', sd(ests)); print(fivenum(ests))
    NN <- NN + blk
    tr <- tr + sum(ests)
    sqsum <- sqsum + sum(ests^2)
    rm(ests)
    # compute sd for the mean tr/NN. It's sqrt(E(x^2) - E(X)^2)/sqrt(NN)
    if (NN > 1) {
      sd <- sqrt(sqsum / (NN - 1) - tr^2 / ((NN * (NN - 1)))) / sqrt(NN)
    } else {
      sd <- 0
    }

    #    message(trname,' sd=',sd,' relsd=',relsd,' NN=',NN, ' cureps=',cureps)
    cureps <- eps(tr / NN)

    # try to figure out how many iterations are needed to obtain the
    # desired tolerance.
    sdtarget <- if (cureps < 0) -cureps else cureps * abs(tr / NN)
    if (NN == 1) sd <- sqrt(2) * sdtarget
    relsd <- sd / abs(tr / NN)
    Ntarget <- as.integer((sd / sdtarget)^2 * NN)
    if (is.na(Ntarget)) stop("Too much variance in trace samples for ", trname, ", sd ", sd, ", cureps ", cureps)
    vpt <- 1 + (Ntarget - NN) %/% threads
    if (vpt > maxvpt) vpt <- maxvpt
    if (vpt < 1) vpt <- 1
    blk <- vpt * threads
  }
  #  if(last > start) cat('\n')
  dt <- as.numeric(Sys.time() - start, units = "secs")
  if (dt > pint) {
    message("  *** trace ", trname, " ", NN, " samples finished in ", as.integer(dt), " seconds")
  }
  structure(tr / NN, sd = sd, iterations = NN)
}