R/cgsolve.R
In lfe: Linear Group Fixed Effects

Documented in cgsolve

# $Id: cgsolve.R 1990 2016-04-14 13:48:36Z sgaure $
# From "A practical termination criterion for the conjugate gradient method", E.F. Kaasschieter
# BIT 28 (1988), 308-322  (2.6).
# return the vector phi_i(x) for i=1:j

phi <- function(j, x, alpha, beta) {
  if (j == 1) {
    return(1)
  }

  psiprev <- 1
  phiprev <- 1 - alpha[1] * x
  res <- c(1, phiprev)
  if (j == 2) {
    return(res)
  }

  for (i in 3:j) {
    psi <- phiprev + beta[i - 1] * psiprev
    phi <- phiprev - alpha[i] * x * psi
    res <- c(res, phi)
    phiprev <- phi
    psiprev <- psi
  }
  as.numeric(res)
}

newmu <- function(oldmu, i, alpha, beta) {
  if (all(phi(i, oldmu, alpha, beta) > 0)) {
    return(oldmu)
  }
  u <- 1e-3
  y <- 0
  z <- oldmu
  # binary search
  x <- oldmu
  while (z - y > u * y) {
    x <- (y + z) / 2
    if (all(phi(i, x, alpha, beta) > 0)) {
      y <- x
    } else {
      z <- x
    }
  }
  return(x)
}

newmus <- function(oldmus, i, alpha, beta) {
  res <- NULL
  for (j in seq_along(oldmus)) {
    res <- c(res, newmu(oldmus[j], i, alpha[, j], beta[, j]))
  }
  res
}

# stop('debug')
# conjugate gradient solver Ax = b, b may be matrix
# If A is a function, it must be able to take a matrix argument
# Algorithm 3 from Kaasschieter (1988)






#' Solve a symmetric linear system with the conjugate gradient method
#'
#'
#' `cgsolve` uses a conjugate gradient algorithm to solve the linear
#' system \eqn{A x = b} where \eqn{A} is a symmetric matrix.  `cgsolve` is
#' used internally in \pkg{lfe} in the routines [fevcov()] and
#' [bccorr()], but has been made public because it might be useful
#' for other purposes as well.
#'
#'
#' The argument `A` can be a symmetric matrix or a symmetric sparse matrix
#' inheriting from `"Matrix"` of the package \pkg{Matrix}.  It can also be
#' a function which performs the matrix-vector product. If so, the function
#' must be able to take as input a matrix of column vectors.
#'
#' If the matrix `A` is rank deficient, some solution is returned.  If
#' there is no solution, a vector is returned which may or may not be close to
#' a solution.  If `symmtest` is `FALSE`, no check is performed that
#' `A` is symmetric. If not symmetric, `cgsolve` is likely to raise
#' an error about divergence.
#'
#' The tolerance `eps` is a relative tolerance, i.e.  \eqn{||x - x_0|| <
#' \epsilon ||x_0||} where \eqn{x_0} is the true solution and \eqn{x} is the
#' solution returned by `cgsolve`. Use a negative `eps` for absolute
#' tolerance.  The termination criterion for `cgsolve` is the one from
#' \cite{Kaasschieter (1988)}, Algorithm 3.
#'
#' Preconditioning is currently not supported.
#'
#' If `A` is a function, the test for symmetry is performed by drawing two
#' random vectors `x,y`, and testing whether \eqn{|(Ax, y) - (x, Ay)| <
#' 10^{-6} sqrt((||Ax||^2 + ||Ay||^2)/N)}, where \eqn{N} is the vector length.
#' Thus, the test is neither deterministic nor perfect.
#'
#' @param A matrix, Matrix or function.
#' @param b vector or matrix of columns vectors.
#' @param eps numeric. Tolerance.
#' @param init numeric. Initial guess.
#' @param symmtest logical. Should the matrix be tested for symmetry?
#' @param name character. Arbitrary name used in progress reports.
#' @return
#'
#' A solution \eqn{x} of the linear system \eqn{A x = b} is returned.
#' @seealso [kaczmarz()]
#' @references Kaasschieter, E. (1988) \cite{A practical termination criterion
#' for the conjugate gradient method}, BIT Numerical Mathematics,
#' 28(2):308-322.  <https://link.springer.com/article/10.1007/BF01934094>
#' @examples
#'
#' N <- 100000
#' # create some factors
#' f1 <- factor(sample(34000, N, replace = TRUE))
#' f2 <- factor(sample(25000, N, replace = TRUE))
#' # a matrix of dummies, which probably is rank deficient
#' B <- makeDmatrix(list(f1, f2))
#' dim(B)
#' # create a right hand side
#' b <- as.matrix(B %*% rnorm(ncol(B)))
#' # solve B' B x = B' b
#' sol <- cgsolve(crossprod(B), crossprod(B, b), eps = -1e-2)
#' # verify solution
#' sqrt(sum((B %*% sol - b)^2))
#'
#' @export cgsolve
cgsolve <- function(A, b, eps = 1e-3, init = NULL, symmtest = FALSE, name = "") {
  start <- Sys.time()
  precon <- attr(A, "precon")
  oneini <- 0
  if (is.matrix(b) || inherits(b, "Matrix")) N <- nrow(b) else N <- length(b)
  if (is.matrix(A) || inherits(A, "Matrix")) {
    if (symmtest && !isSymmetric(A)) stop("matrix is not symmetric")
    fun <- function(v) as.matrix(A %*% v)
  } else if (is.function(A)) {
    fun <- function(v) as.matrix(A(v))
    if (symmtest) {
      # A symmetric matrix satisfies (Ax,y) = (Ay,x) for every y and x
      x <- as.matrix(runif(N, 0.7, 1.4))
      y <- as.matrix(runif(N, 0.7, 1.4))
      Ax <- fun(x)
      Ay <- fun(y)
      if (abs(sum(Ax * y) - sum(Ay * x)) > 1e-6 * sqrt(mean(Ax^2) + mean(Ay^2))) {
        warning(deparse(substitute(A)), " seems to be non-symmetric")
      }
      rm(x, y, Ax, Ay)
    }
  } else {
    stop("A must be a matrix, Matrix or a function")
  }

  b <- as.matrix(b)
  start <- last <- Sys.time()

  #  if(is.null(init)) init <- matrix(rnorm(nrow(b)*ncol(b)),nrow(b))
  if (is.null(init)) {
    x <- matrix(0, N, ncol(b))
    r <- b
  } else {
    x <- as.matrix(init)
    if (ncol(x) == 1 && ncol(b) > 1) {
      x <- matrix(init, N, ncol(b))
    }
    r <- b - fun(x)
  }
  p <- r
  tim <- 0
  # first iteration outside loop
  oldr2 <- colSums(r^2)
  minr2 <- sqrt(max(oldr2))
  Ap <- fun(p)
  alpha <- oldr2 / colSums(p * Ap)
  mu <- 1 / alpha
  allalpha <- matrix(alpha, 1)

  x <- x + t(alpha * t(p))
  r <- r - t(alpha * t(Ap))
  r2 <- colSums(r^2)

  res <- matrix(0, nrow(x), ncol(x))
  origcol <- 1:ncol(b)
  k <- 0
  kk <- 0
  allbeta <- NULL
  if (length(eps) == 1) eps <- rep(eps, ncol(b))
  negeps <- eps < 0
  eps <- abs(eps)
  eps <- ifelse(negeps, eps, eps / (1 + eps))
  pint <- getOption("lfe.pint")
  while (TRUE) {
    k <- k + 1
    delta <- mu * ifelse(negeps, 1, sqrt(colSums(x^2)))

    now <- Sys.time()
    dt <- as.numeric(now - last, units = "secs")
    if (dt > pint) {
      last <- now
      message(
        date(), " CG iter ", k, " ", name, " target=", signif(min(eps), 3),
        " delta=", signif(max(sqrt(r2) / delta), 3), " vecs=", length(delta), " mu ", signif(min(mu), 3)
      )
    }


    done <- sqrt(r2) < eps * delta | (k >= N)

    if (any(done)) {
      #      message('finished vec ',ilpretty(origcol[done]))
      # remove the finished vectors
      res[, origcol[done]] <- x[, done, drop = FALSE]
      origcol <- origcol[!done]
      eps <- eps[!done]
      negeps <- negeps[!done]
      x <- x[, !done, drop = FALSE]
      if (length(origcol) == 0) break
      p <- p[, !done, drop = FALSE]
      r <- r[, !done, drop = FALSE]
      r2 <- r2[!done]
      oldr2 <- oldr2[!done]
      mu <- mu[!done]
      allalpha <- allalpha[, !done, drop = FALSE]
      allbeta <- allbeta[, !done, drop = FALSE]
      kk <- 0
    }
    r2rms <- sqrt(max(r2))
    minr2 <- min(minr2, r2rms)
    if ((kk > 1000 && r2rms > 100 * minr2) || (k > 100 && r2rms > 10000 * minr2)) {
      warning(
        "cgsolve (", name, ") seems to diverge, iter=", k, ", ||r2||=", r2rms,
        " returning imprecise solutions"
      )
      res[, origcol[seq_len(ncol(x))]] <- x
      return(res)
    }

    kk <- kk + 1
    beta <- r2 / oldr2
    allbeta <- rbind(allbeta, beta)
    # we have created some C-functions to handle some operations.
    # Solely to avoid copying large matrices.
    p <- .Call(C_pdaxpy, r, p, beta)
    #    p <- r + t(beta*t(p))
    Ap <- fun(p)
    #    gc()
    alpha <- r2 / .Call(C_piproduct, Ap, p)
    #    alpha <- r2/colSums(Ap*p)
    #    message(name, ' ', k, ' alpha '); print(alpha)
    allalpha <- rbind(allalpha, alpha)
    x <- .Call(C_pdaxpy, x, p, alpha)
    r <- .Call(C_pdaxpy, r, Ap, -alpha)
    oldr2 <- r2
    r2 <- colSums(r^2)
    mu <- newmus(mu, k + 1, allalpha, allbeta)
  }
  #  message('CG iters:',k)
  now <- Sys.time()
  dt <- as.numeric(now - start, units = "secs")
  if (dt > pint) {
    message("  *** cgsolve(", name, ") finished with ", k, " iters in ", as.integer(dt), " seconds")
  }
  res
}