R/owenEL.R

# Self-concordant empirical likelihood for a vector mean,
# as described in:
#
# @article{owen:2013,
#  title={Self-concordance for empirical likelihood},
#  author={Owen, A. B.},
#  journal={Canadian Journal of Statistics},
#  volume={41},
#  number={3},
#  pages={387--397},
#  year={2013}
# }

# This file has two callable functions:
#   emplik    does one EL calculation,
#   tracelr   calls emplik on a trajectory from mu0 to mu1 in N+1 steps

# emplik returns:
#  logelr     log empirical likelihood ratio
#  lam        Lagrange multiplier (vector of length d)
#  wts        observation weights/probabilities (vector of length n)
#  converged  TRUE if algorithm converged
#             FALSE usually means that mu is not in the convex hull of the data
#             you then get a very small likelihood (instead of zero)
#  iter       number of iterations taken
#  ndec       Newton decrement (see Boyd & Vandenberghe) 
#  gradnorm   norm of gradient of log empirical likelihood

# tracelr returns: a matrix with one row at each mean from mu0 to mu1
#                  and with a column for each EL return value (except wts)

# There are also two internal functions:
#   mllog     gives a family of alternatives to -log() and derivative thereof
#             in order to attain self-concordance.
#   svdlm     does least squares regression via the SVD

# These lengthen the emplik function, but making them internal keeps them
# out of the users' name spaces

# Art B. Owen, Feb 2014

#' Use the dual formulation to solve the inner loop.
#' 
#' @param z a matrix of the score equations with observations in rows and variables in columns.
#' @param lam a vector of initial values for the dual parameter.
#' @param eps lower cutoff for logarithm approximation.
#' @param M upper cutoff for logarithm approximation.
#' @param thresh convergence threshold.
#' @param itermax maximum number of iterations.
#' @param verbose print console output.
#' @export
emplik_concord = function(  z,  # matrix with one data vector per row, a column vector is ok when d=1
                    lam,  # starting lambda, default (0 ... 0)
                    eps = 1 / nrow(z),  # lower cutoff for -log( ), default 1/nrow(z)
                    M = Inf,  # upper cutoff for -log( ), default Inf
                    thresh=1e-16,  # convergence threshold for log likelihood (default is aggressive)
                    itermax=150,  # upper bound on number of Newton steps (seems ample)
                    verbose=FALSE
                    )  # controls printed output
{ 
  # empirical likelihood test for whether
  # mean of (rows of) z is mu
  
  
  # Backtracking line search parameters [Tweak only with extreme caution.] 
  # See Boyd and Vandenberghe, pp464-466.
  ALPHA = 0.3  # seems better than 0.01 on some 2d test data (sometimes fewer iters)
  BETA  = 0.8
  # We need  0 < ALPHA < 1/2   and 0 < BETA < 1
  
  # Backtrack threshold: you can miss by this much.
  BACKEPS = 1e-10
  # Consider replacing 0 by 1e-10 if backtracking seems to be
  # failing due to round off.
  
  if( is.vector(z) )
    z = matrix(z,ncol=1)
  
  n = nrow(z)
  d = ncol(z)
  
  #if( missing(mu)  )
    #mu  = rep(0,d)
  #z = t( t(z)-mu ) # subtract mu from each z[i,]
  
  #
  # Use lam = 0 or initial lam, whichever is best
  #
  
  init0 = mllog( rep(1,n), eps=eps, M=M, der=2 ) # i.e. lam = 0
  
  if( missing(lam) ){
    init = init0
    lam  = rep(0,d)
  }else{
    init = mllog( 1+z%*%lam, eps=eps, M=M, der=2 )
    if( sum(init0[,1]) < sum(init[,1]) ){
      lam = rep(0,d)
      init = init0
    }
  }
  
  # Initial f, g
  fold = sum(init[,1])
  gold = apply( z * init[,2],2,sum )
  
  converged = FALSE
  iter      = 0
  oldvals   = init
  while( !converged ){
    iter = iter + 1
    
    # Get Newton Step
    rootllpp = sqrt(oldvals[,3])  # sqrt 2nd deriv of -llog lik
    zt = z
    for( j in 1:d )
      zt[,j] = zt[,j] * rootllpp
    yt   = oldvals[,2] / rootllpp
    step = -svdlm(zt,yt)  #  more reliable than step = -lm( yt~zt-1 )$coef
    
    backtrack = FALSE 
    s = 1   # usually called t, but R uses t for transpose
    while( !backtrack ){
      newvals = mllog( 1+z%*%(lam+s*step),eps=eps,M=M,der=2 )
      fnew    = sum(newvals[,1])
      targ    = fold + ALPHA * s * sum( gold*step ) + BACKEPS # (BACKEPS for roundoff, should not be needed)
      if(  fnew <= targ ){
        # backtracking has converged
        backtrack = TRUE
        oldvals   = newvals
        fold      = fnew
        gold      = apply( z * oldvals[,2],2,sum )
        # take the step
        lam       = lam + s*step
      }else{
        s = s * BETA
      }
    }
    
    # Newton decrement and gradient norm
    ndec     = sqrt( sum( (step*gold)^2 ) )
    gradnorm = sqrt( sum(gold^2))
    
    if(verbose)print(c(fold,gradnorm,ndec,lam))
    
    converged = ( ndec^2 <= thresh)
    if( iter > itermax )break
  }
  
  wts    = (1/n)/(1+z%*%lam)
  # logelr = -2*sum( mllog(1+z%*%lam,eps=eps,M=M,der=0) )
  logelr <- sum( mllog(1+z%*%lam,eps=eps,M=M,der=0) )
  
  list(logelr=logelr, logelr_stat = -2*logelr, lambda=lam, wts=wts,
       converged=converged,nits=iter,ndec=ndec,gradnorm=gradnorm)
}


# Internal function mllog, modified -log( ) with derivatives

mllog = function( x, eps = 1 / length(x), M = Inf, der=0 ){
  # minus log and its first der derivatives, on  eps < x < M
  # 4th order Taylor approx to left of eps and right of M
  # der = 0 or 1 or 2
  # 4th order is lowest that gives self concordance
  
  if( eps>M )
    stop("Thresholds out of order")
  
  lo = x < eps
  hi = x > M
  md = (!lo) & (!hi)
  
  # Coefficients for 4th order Taylor approx below eps
  coefs      = rep(0,5)
  coefs[1]   = -log(eps)
  coefs[2:5] = (-eps)^-(1:4)/(1:4)
  
  # Coefficients for 4th order Taylor approx above M
  Coefs      = rep(0,5)
  Coefs[1]   = -log(M)
  Coefs[2:5] = (-M)^-(1:4)/(1:4)
  
  
  
  # function value
  f      = x*0
  f[lo]  = h( x[lo]-eps, coefs )
  f[hi]  = h( x[hi]-M,   Coefs )
  f[md]  = -log(x[md])
  
  if( der<1 )return(cbind(f))
  
  # first derivative
  fp     = x*0
  fp[lo] = hp( x[lo]-eps, eps )
  fp[hi] = hp( x[hi]-M, M )
  fp[md] = -1/x[md]
  
  if( der<2 )return(cbind(f,fp))
  
  # second derivative
  fpp     = x*0
  fpp[lo] = hpp( x[lo]-eps, eps )
  fpp[hi] = hpp( x[hi]-M, M )
  fpp[md] = 1/x[md]^2
  
  return( cbind(f,fp,fpp) )
  # End of mllog()
} 

#####internal mllog functions
# degree 4 polynomial approx to log
h = function(y,cvals){ # cvals are coefs at eps, Coefs at M
  # sum c[t+1] y^t
  tee = 1:4
  ans = y*0
  ans = ans + cvals[1]
  for( j in tee )
    ans = ans + y^j*cvals[j+1]
  ans
}

# first derivative of h at y, from approx at pt
hp = function(y,pt){
  tee = 0:3
  ans = y*0
  for( j in tee )
    ans = ans + (-y/pt)^j
  ans = ans * (-pt)^-1
  ans
}

# second derivative of h at y, from approx at pt
hpp = function(y,pt){
  tee = 0:2
  ans = y*0
  for( j in tee )
    ans = ans + (j+1) * (-y/pt)^j
  ans = ans *(-pt)^-2 
  ans
}
#####

# Internal function to do a linear model via SVD
# Empirical likelihood's Newton steps are of
# least squares type.

svdlm = function(X,y){
  # Linear model regression coefficient via SVD
  
  # Tolerances for generalized inverse via SVD
  RELTOL = 1e-9     
  ABSTOL = 1e-100
  
  # Get Xplus = generalized inverse of X
  # If svd algorithm failures are encountered
  # it sometimes helps to try svd(t(X)) and
  # translate back. First check to ensure that
  # X does not contain NaN or Inf or -Inf.
  svdX     = svd(X)
  d        = svdX$d
  lo       = d < (RELTOL * max(d) + ABSTOL)
  dinv     = 1/d
  dinv[lo] = 0
  Xplus    = svdX$v %*% diag(dinv,nrow=length(dinv)) %*% t(svdX$u)
  # taking care with diag when dinv is 1x1
  # to avoid getting the identity matrix of
  # size floor(dinv)
  
  # Return X^+ y
  Xplus %*% matrix(y,ncol=1)
}
# end of svdlm
hoangtt1989/BICEP documentation built on May 28, 2019, 3:37 p.m.