R/QP.R

Defines functions empty.env PQP.solve

# solve a quadratic programming problem constrained to the probability simplex
# this method uses the KKT relations to identify what appears to be the current feasible/free region
# and then within that region we apply preconditioned conjugate gradient to the equality constrained optimization problem
PQP.solve <- function(G,FLOOR=NULL,p=NULL,lag=NULL,error=.Machine$double.eps,PC="circulant",IG=NULL,MARKOV=NULL,trace=FALSE,...)
{
  silent <- TRUE
  RESIDUAL <- FALSE # residual conjugate gradient (not yet tested)

  if(length(dim(G))==2) { fast <- FALSE } else { fast <- TRUE }
  # are we doing direct matrix calculations or FFT tricks?
  if(!fast)
  {
    n <- dim(G)[1]

    # O(n^2) matrix-vector multiplication
    G.VEC <- function(V,inverse=FALSE) { c(G %*% V) }

    # stuff needed to apply preconditioners
    if(PC=="IID")
    {
      ###############################
      #IID PRECONDITIONER: diagonal + constant matrix
      # pre-conditioner eigen-values
      PC0 <- (G[1,1] + (n-1)*G[1,n])
      PC1 <- (G[1,1] - G[1,n])
      PC.UPDATE <- function()
      {
        PC0 <<- (G[1,1] + (m-1)*G[1,n])
        PC1 <<- (G[1,1] - G[1,n])
      }
    }
    else if(PC=="banded")
    {
      ######################
      # Tri-banded + Constant matrix
      CONS <- G[1,n]
      DIAG <- G[1,1] - CONS
      BAND <- sapply(1:(n-1),function(i) { G[i,i+1] }) - CONS
      PC.UPDATE <- function()
      {
        INDEX <- (1:n)[FREE]
        BAND <<- sapply(1:(m-1), function(i) { G[INDEX[i],INDEX[i+1]] } ) - CONS
      }
    }
    else if(PC=="circulant")
    {
      ###############################
      # PSD CIRCULANT PRECONDITIONER
      # T. Chan's PSD preconditioner for symmetric, almost-Toeplitz matrices
      # working in free space only
      C <- G
      # what diagonal am I on?
      dg <- (row(C)-col(C)) %% n
      # split by diagonal
      C <- split(C,dg)
      # average diagonals -> circulant diagonals
      C <- sapply(C,mean)
      # diagonalize the circulant matrix
      C <- FFT(C)
      # inverse matrix
      C <- 1/Re(C)
      PC.UPDATE <- function()
      {
        # working in free space only
        C <- G[FREE,FREE]
        # what diagonal am I on
        dg <- (row(C)-col(C)) %% m
        # split by diagonal
        C <- split(C,dg)
        # average diagonals - circulant diagonals
        C <- sapply(C,mean)
        # diagonalize the circulant matrix
        C <- FFT(C)
        # inverse matrix
        C <<- 1/Re(C)
      }
    }
    else if(PC=="Toeplitz")
    {
      stop("!fast & Topelitz not yet implemented.")
    }
  } # end !fast PC and %*%
  else ### using FFT tricks ###########################################################
  {
    n <- length(FLOOR) # number of sampled times
    N <- length(G) # numer of grid times
    q <- 1-p

    # stuff needed to apply preconditioners
    if(PC=="IID")
    {
      # # pre-conditioner eigen-values
      G1 <- G[1] ; GN <- G[N]
      # conditioning transformation eigen-values
      PC0 <- (G1 + (n-1)*GN)
      PC1 <- (G1 - GN)
      PC.UPDATE <- function()
      {
        # conditioning transformation eigen-values
        PC0 <<- (G1 + (m-1)*GN)
        PC1 <<- (G1 - GN)
      }
    }
    else if(PC=="banded")
    {
      stop("fast banded not yet implemented")
    }
    else if(PC=="circulant")
    {
      # unmodified copy of 'G' corresponding to lag
      g <- G
      # sparser lags approximating free space
      LAG <- seq(0,last(lag),length.out=n)
      # interpolate over approximate free space
      C <- stats::approx(lag,g,LAG,rule=2)$y
      R <- (0:(n-1))/n
      C <- (1-R)*C + R*c(0,rev(C[-1]))
      C <- FFT(C)
      C <- 1/Re(C)

      PC.UPDATE <- function()
      {
        LAG <- seq(0,last(lag),length.out=m)
        C <- stats::approx(lag,g,LAG,rule=2)$y
        R <- (0:(m-1))/m
        C <- (1-R)*C + R*c(0,rev(C[-1]))
        C <- FFT(C)
        C <<- 1/Re(C)
      }
    }

    # circulant embedding for FFT matrix multiplication
    T.C.embed <- function(M)
    {
      M <- c(M,0,rev(M[-1]))
      # Fourier transform
      M <- FFT(M)
      M <- Re(M)
      return(M)
    }
    G <- T.C.embed(G)

    if(PC=="Toeplitz")
    {
      # separate out the initial discontinuity
      IG0 <- IG[1] - IG[2]
      IG[1] <- IG[2]
      # now the preconditioner is IG0*Id + IG
      IG <- T.C.embed(IG)
    }

    # O(n log n) matrix.vector multiplication
    G.VEC <- function(V,inverse=FALSE)
    {
      # grid embedding
      Vgrid <- array(0,N)
      Vgrid[FLOOR+1] <- q*V
      Vgrid[FLOOR] <- Vgrid[FLOOR] + p*V

      # circulant embedding
      Vgrid <- c(Vgrid,rep(0,N))
      # Fourier transform / diagonalize
      Vgrid <- FFT(Vgrid)

      # matrix multiplication in diagonalized basis
      if(!inverse)
      { Vgrid <- G * Vgrid }
      else
      { Vgrid <- IG * Vgrid }

      # inverse Fourier transform
      Vgrid <- IFFT(Vgrid)
      # Toepltiz part
      Vgrid <- Vgrid[1:N]
      Vgrid <- Re(Vgrid)

      # map back from grid
      V <- q*Vgrid[FLOOR+1]
      V <- V + p*Vgrid[FLOOR]

      return(V)
    }

    if(PC=="Toeplitz")
    {
      # inverse approximation normalization
      IGM <- rep(1,n) # perhaps the most important vector to fix the norm with
      IGM <- (IG0 * IGM) + G.VEC(IGM,inverse=TRUE)
      IGM <- G.VEC(IGM,inverse=FALSE)
      IGM <- abs(IGM)
      IGM <- mean(IGM) # average and quadratic form now
      IGM <- 1/IGM
      PC.UPDATE <- function()
      {
        IGM <- FREE
        IGM <- (IG0 * IGM) + G.VEC(IGM,inverse=TRUE)
        IGM <- G.VEC(IGM,inverse=FALSE)
        IGM <- IGM[FREE]
        IGM <- abs(IGM)
        IGM <- mean(IGM) # average and quadratic form now
        IGM <- 1/IGM
      }
    }

    if(PC=="direct")
    {
      # calculate n*n G matrix
      G <- sapply(1:n,function(i){ V <- rep(0,n) ; V[i] <- 1 ; G.VEC(V) })
      # somehow this ends up missing a sparse number of times
      G <- pmax(G,t(G))

      G.VEC <- function(V,inverse=FALSE) { c(G %*% V) }
    }
  } # end fast PC and %*%

  ########################################################
  ######### DEFINE PRECONDITIONERS #######################
  # preconditioner application code
  if(PC=="IID")
  {
    # 1/sqrt(G_IID) pre-conditioning matrix multiplication
    # this is supposed to be a rough approximation of 1/G
    PC.VEC <- function(V)
    { (1/PC1)*V + (1/PC0-1/PC1)*mean(V[FREE])*FREE }
  }
  else if(PC=="banded")
  {
    # tri-band solver without constant
    BAND.SOLVE <- function(VEC)
    {
      # forward elimination
      B <- rep(BAND[1]/DIAG,m)
      V <- rep(VEC[1]/DIAG,m)
      for(i in 2:(m-1))
      {
        # can precalculate some of this
        A <- DIAG - BAND[i-1]*B[i-1]
        B[i] <- BAND[i]/A
        V[i] <- (VEC[i] - BAND[i-1]*V[i-1])/A
      }
      A <- DIAG - BAND[m-1]*B[m-1]
      V[m] <- (VEC[m] - BAND[m-1]*V[m-1])/A

      # backwards substitution
      for(i in (m-1):1)
      {
        V[i] <- V[i] - B[i]*V[i+1]
      }

      return(V)
    }

    # Woodbury matrix identity to account for constant atop tri-band
    PC.VEC <- function(V)
    {
      ONE <- rep(1,m)
      TBF <- BAND.SOLVE(ONE)
      TBV <- BAND.SOLVE(V[FREE])

      V[FREE] <- TBV - c(ONE %*% TBV)/(1/CONS + c(ONE %*% TBF))*TBF

      return(V)
    }
  } # end banded PC
  else if(PC=="circulant")
  {
    PC.VEC <- function(V)
    {
      W <- V[FREE]
      W <- FFT(W)
      W <- C * W
      W <- IFFT(W)
      W <- Re(W)
      V[FREE] <- W
      return(V)
    }
  }
  else if(PC=="Toeplitz")
  {
    PC.VEC <- function(V)
    {
      V[FREE] <- IGM*( IG0*V[FREE] + G.VEC(V,inverse=TRUE)[FREE] )
      return(V)
    }
  }
  else if(PC=="Markov")
  {
    # This is all exactly the same, fast and slow
    # following the notation of Rybicki (1994)
    PC.R <- exp(-diff(MARKOV$t))
    PC.E <- PC.R/(1-PC.R^2)
    PC.D <- PC.R*PC.E
    PC.D <- 1 + c(0,PC.D) + c(PC.D,0)
    PC.UPDATE <- function()
    {
      PC.R <<- exp(-diff(MARKOV$t[FREE]))
      PC.E <<- PC.R/(1-PC.R^2)
      PC.D <<- PC.R*PC.E
      PC.D <<- 1 + c(0,PC.D) + c(PC.D,0)
    }

    # tri-banded inverse
    MARKOV.SOLVE <- function(V)
    {
      W <- V[FREE]
      W <- PC.D*W - c(PC.E*W[-1],0) - c(0,PC.E*W[-m])
      V[FREE] <- W/MARKOV$VAR
      return(V)
    }

    # use Woodbury matrix identity to add constant term
    PC.VEC <- function(V)
    {
      MSF <- MARKOV.SOLVE(FREE)
      MSV <- MARKOV.SOLVE(V)

      V <- MSV - c(FREE %*% MSV)/(1/MARKOV$ERROR + c(FREE %*% MSF))*MSF

      return(V)
    }
  } # end Markov PC

  # # find the best linear combination of vectors for solution to V=solve(G,1)
  # L.SEARCH <- function(V,G.V)
  # {
  #   k <- length(V)
  #   k.V <- length(G.V)
  #
  #   # populate remainder of G.V if necessary
  #   if(k.V < k)
  #   {
  #     for(i in (k.V+1):k)
  #     {
  #       G.V[[i]] <- G.VEC(V[[i]])
  #       G.V[[i]][ACTIVE] <- 0
  #     }
  #   }
  #
  #   # matrix of inner productions WRT G
  #   M <- array(0,c(k,k))
  #   for(i in 1:k)
  #   {
  #     # fill upper triangle
  #     for(j in i:k)
  #     { M[i,j] <- c(V[[i]] %*% G.V[[j]]) }
  #     # copy lower triangle
  #     for(j in 1:i)
  #     { M[j,i] <- M[i,j] }
  #   }
  #
  #   # vector of inner products WRT 1
  #   B <- sapply(V,sum)
  #
  #   # safe solver in case of colinearity
  #   A <- PDsolve(M,pseudo=TRUE) %*% B
  #
  #   V <- lapply(1:k,function(i) { A[i] * V[[i]] })
  #   G.V <- lapply(1:k,function(i) { A[i] * G.V[[i]] })
  #
  #   V <- Reduce('+',V)
  #   G.V <- Reduce('+',G.V)
  #
  #   W <- sum(V)
  #   V <- V/W
  #   G.V <- G.V/W
  #
  #   return(list(V=V,G.V=G.V))
  # }

  #####################
  # CHECK KKT CONDITIONS TO FIX ACTIVE/FREE DIMENSIONS
  ACTIVE <- rep(FALSE,n) # current active constraints
  FREE <- !ACTIVE # current feasible dimensions
  m <- n # number of free dimensions
  CHANGE <- TRUE # did our constraints change?, then keep working
  MISE <- rep(Inf,3) # last 3 MISE objectives - the 3>2 is arbitrary
  LAMBDA <- rep(1/n,3) # last 3 normalization constants
  KKT <- function()
  {
    # we're going to toss the negative probabilities out of the feasible region
    ZERO <- (P<=0)
    P[ZERO] <<- 0

    # update past esitmates (un-normalized)
    P.OLD <<- rbind(P,P.OLD[1:2,])

    # normalization constant
    LAMBDA[2:3] <<- LAMBDA[1:2] # update past lambdas
    LAMBDA[1] <<- 1/sum(P) # the current Lagrange multiplier

    G.P <<- G.VEC(P)

    # update MISE objective
    MISE[2:3] <<- MISE[1:2]
    MISE[1] <<- LAMBDA[1]^2 * c(P %*% G.P)

    # optimization on the probability simplex determines the active constraints from KKT equations
    GRAD <- 1 - G.P # -GRAD/LAMBDA, where GRAD == slack at solution (KKT)

    # currently active inequality constraints
    ACTIVE -> OLD
    ACTIVE <<- ZERO & (GRAD <= 0) # in these dimensions, the gradient is trying to push through the boundary, creating slack (KKT)
    CHANGE <<- any(ACTIVE-OLD) # did our active constraints change?
    FREE <<- !ACTIVE # free space to work in
    m <<- sum(FREE) # size of current free space

    return(NULL)
  }

  ############################
  # INITIALIZE SOLUTION
  # use previous solution
  # if(TRUE)
  if(!length(ls(envir=PQP.env)) || get("EMPTY",pos=PQP.env))
  {
    # not yet a normalized probability
    P <- rep(1,n)

    # approximate solve(G,1) consistent with preconditioner
    if(PC!="direct")
    { P <- PC.VEC(P) }
    else # might as well do something
    { P[FREE] <- qr.solve(G[FREE,FREE],rep(1,m),tol=.Machine$double.eps) }

    P <- clamp(P,0,Inf)
    P <- P/sum(P)
  }
  else # use preconditioner solution
  {
    P <- get("P",pos=PQP.env)
  }
  G.P <- P # initializing variable with junk
  P.OLD <- rbind(P,P,P) # more junk x3
  # check KKT conditions, incase we have a non-trivial initial guess
  KKT() # this returns a number now... does that matter in R?
  # as a side effect this also evaluates G.P, which we need

  CHANGE <- TRUE
  STEPS <- 0
  CHANGES <- 0
  while(CHANGE)
  {
    CHANGES <- CHANGES + 1

    # DIRECT SOLVER #############
    if(PC=="direct")
    {
      STEPS <- STEPS + 1
      P[FREE] <- qr.solve(G[FREE,FREE],rep(1,m),tol=.Machine$double.eps)
    }
    else # CONJUGATE GRADIENT SOLVER ########################
    {
      PC.UPDATE()
      G.P[ACTIVE] <- 0
      P[ACTIVE] <- 0
      SP <- sum(P)
      P <- P/SP
      G.P <- G.P/SP

      # STEEPEST DESCENT INITIALIZATION INTO CONJUGATE GRADIENT ON !CHANGE
      # optimization in the free space to actually solve for Q=solve(G_FREE,1_FREE) proportional to solution by P=LAMBDA*Q
      RES <- FREE - G.P # (r) this is -GRAD in the conditioned free space, using conjugate-gradient notation of residual
      Z <- PC.VEC(RES) # (z) preconditioned conjugate gradient variable
      CONJ <- Z # (p) this is often called 'p', but that's confusing because we are working with probabilities
      # optimize step length in free space assumption

      # CONJUGATE GRADIENT (STEPEST DESCENT ON FIRST STEP)
      ERROR <- ERROR.OLD <- Inf
      CG.STEPS <- 0
      RESTARTED <- FALSE
      while(ERROR > error && CG.STEPS < n)
      {
        # optimize step length in free space assumption
        G.CONJ <- G.VEC(CONJ) # (A.p)
        G.CONJ[ACTIVE] <- 0

        Z.RES <- c(Z %*% RES) # (z.r)
        if(!RESIDUAL) # regular conjugate gradient
        {
          C.G.C <- c(CONJ %*% G.CONJ) # (p.A.p)
          if(abs(C.G.C)<=.Machine$double.eps) { break }

          A <- Z.RES / C.G.C # (alpha)
        }
        else # residual conjugate gradient
        {
          PC.G.C <- PC.VEC(G.CONJ) # (PC.A.p)
          C.G.PC.G.C <- c(G.CONJ %*% PC.G.C) # (p.A.PC.A.p)
          if(abs(C.G.PC.G.C)<=.Machine$double.eps) { break }

          if(CG.STEPS==0)
          {
            G.Z <- G.VEC(Z) # (A.z)
            Z.G.Z <- c(Z %*% G.Z) # (z.A.z)
          }

          A <- Z.G.Z / C.G.PC.G.C
        }

        dP <- A*CONJ # (dx) need this for stopping condition

        # make sure corrections will get smaller
        ERROR.OLD <- ERROR
        ERROR <- sqrt(sum(dP^2))

        # make sure Lagrangian/objective will decrease
        dL <- c((P+dP/2) %*% G.CONJ)*A - sum(dP)

        if(ERROR>=ERROR.OLD || dL>=0)
        {
          # you get one restart
          if(RESTARTED || !CG.STEPS) { break }

          G.P <- G.VEC(P)
          G.P[ACTIVE] <- 0
          RES <- FREE - G.P # (r)
          Z <- PC.VEC(RES) # (z)
          CONJ <- Z # (p)

          RESTARTED <- TRUE
          ERROR <- ERROR.OLD
          next
        }

        P <- P + dP # (x)

        dRES <- -A*G.CONJ # (dr) need this for Polak-Ribiere formula
        RES <- RES + dRES # (r)
        Z <- PC.VEC(RES) # (z)

        if(!RESIDUAL) # regular conjugate gradient
        {
          Z.dRES <- c(Z %*% dRES) # (z.dr) Polak-Ribiere formula
          Z.dRES <- max(0,Z.dRES) # reset to steepest descent if necessary
          if(abs(Z.RES)<=.Machine$double.eps) { break } # ?
          B <- Z.dRES / Z.RES # (beta)
        }
        else # residual conjugate gradient
        {
          B <- Z.G.Z # temp storage

          G.Z <- G.VEC(Z) # (A.z)
          Z.G.Z <- c(Z %*% G.Z) # (z.A.z)

          B <- Z.G.Z/B
        }
        CONJ <- Z + B*CONJ # (p)

        CG.STEPS <- CG.STEPS + 1
      }
      STEPS <- STEPS + CG.STEPS
    }

    # CHECK KKT CONDITIONS AND RESET ACTIVE/FREE SPACE
    KKT()

    # make sure the MISE is decreasing
    # make sure we are not stuck in a rare numerical-indiscrimination / solution-boundary loop
    if(all(MISE[1] >= MISE[2:3])) { break }
  }

  # select best solution of past few (in case of rare boundary loop)
  MIN <- which.min(MISE)
  MISE <- MISE[MIN]
  P <- P.OLD[MIN,]

  # STORE SOLUTION FOR LATER OPTIM EVALUATIONS
  assign("P",P,pos=PQP.env)
  assign("EMPTY",FALSE,pos=PQP.env) # SET TO FALSE TO ACTIVATE MEMORY !!!!!!!!!!!!!!!!!!!!!!!!!!

  P <- LAMBDA[MIN] * P

  RETURN <- list(P=P,MISE=MISE,STEPS=STEPS,CHANGES=CHANGES)
  return(RETURN)
}

# store solutions to pass between optimize evaluations
PQP.env <- new.env()

# empty out this environment when we are done
empty.env <- function(ENV)
{
  STUFF <- ls(ENV)
  rm(list=STUFF,pos=ENV)
  assign("EMPTY",TRUE,pos=ENV)
}
ctmm-initiative/ctmm documentation built on April 18, 2024, 9:39 a.m.