In rmflight/rsaga: What the package does (short line)

SAGA

Simulated Annealing / Genetic Algorithm.

library(saga)

Test Functions

As an initial test function, we will use a multi-linear regression that is dependent on 8 different parameters. This function is taken from the NIST website.

betaValues <- c(9.8940368970E+01,
                1.0945879335E-02,
                1.0069553078E+02,
                1.1163619459E+02,
                2.3300500029E+01,
                7.3705031418E+01,
                1.4776164251E+02,
                1.9668221230E+01)

betaSD <- c(5.3005192833E-01,
            1.2554058911E-04,
            8.1256587317E-01,
            3.5317859757E-01,
            3.6584783023E-01,
            1.2091239082E+00,
            4.0488183351E-01,
            3.7806634336E-01)

fx <- function(x, beta){
  beta[1] * exp(-1 * beta[2] * x) + 
  beta[3] * exp(-1 * (x - beta[4])^2 / beta[5]^2) + 
  beta[6] * exp(-1 * (x - beta[7])^2 / beta[8]^2)
}

x <- seq(1, 250, 1)
y <- fx(x, betaValues)
plot(x, y)

Our evaluation criteria is how different is a new set of y based on our candidate beta values. Note that this energy function is specific to our current problem.

energyFunc <- function(newBeta){
  orgBeta <- c(9.8940368970E+01,
                1.0945879335E-02,
                1.0069553078E+02,
                1.1163619459E+02,
                2.3300500029E+01,
                7.3705031418E+01,
                1.4776164251E+02,
                1.9668221230E+01)
  fx <- function(x, beta){
    beta[1] * exp(-1 * beta[2] * x) + 
    beta[3] * exp(-1 * (x - beta[4])^2 / beta[5]^2) + 
    beta[6] * exp(-1 * (x - beta[7])^2 / beta[8]^2)
  }
  x <- seq(1, 250, 1)
  org <- fx(x, orgBeta)
  new <- fx(x, newBeta)

  nObs <- length(org)
  return((sum((org - new)^2)))
}

And our values for each value of beta will be drawn uniformly from a distribution with a range of 0 to 200. The neighbor values will be based on the current values, but the standard deviation will be adjusted based on the current temperature and alpha.

neighborFunction <- function(currentPopulation, currentTemperature, alpha){
  useSD <- alpha * currentPopulation
  useSD[(useSD < 0.05)] <- 0.05
  newPopulation <- rnorm(length(currentPopulation), currentPopulation, useSD)
  newNegative <- newPopulation <= 0
  newPopulation[newNegative] <- currentPopulation[newNegative]
  # an alpha of 0 means temperature has no effect
  return(newPopulation)
}

Initial Temperature

Lets generate a bunch of samples to see what temperature we need to reasonably be able to accept higher energy solutions while we search the space.

generateSolution <- function(){
  newSolution <- runif(8, 0, 200)
  return(newSolution)
}

randomSolutions <- replicate(50, generateSolution())
randomEnergy <- apply(randomSolutions, 2, energyFunc)
abs(diff(randomEnergy))

Our largest values are on the order of 1e7, so our initial temperature should probably be on the order of 1e8.

initialSolution <- runif(8, 0, 200)
initialSolution[2] <- runif(1, 0, 1)

initTemp <- 1e6


outSA <- sa(initialSolution, energyFunc, neighborFunction, initTemp, deltaTemperature=10, alpha=1, nTry=1e5)

library(snowfall)
sfInit(parallel=TRUE, cpus=4)

sfLibrary(saga)
sfExport("tmpNeighbor", "energyFunc")

varyInitPop <- lapply(seq(1, 50), function(x){
  rnorm(8, mean=betaValues, sd=0.2*betaValues)
})

outRes <- sfLapply(varyInitPop, function(x){
  sa(x, energyFunc, tmpNeighbor, 300, 0.005, alpha=0.01, nTry=Inf)
})
sfStop()
save(outRes, file="inst/data/sa50Test.RData")

bestEnergies <- sapply(outRes, function(x){ x$bestEnergy})
plot(bestEnergies)
bestSol <- lapply(outRes, function(x){x$bestSolution})
bestSol <- do.call(rbind, bestSol)
betaValues
bestSol
plot(log10(bestEnergies))

Other tests

So the above tests are too biased. We start too close to the real answer. So lets try out the built in optimization methods in R and see if they actually work, namely the SANN method and Nelder-Mead.

library(stats)
initGuess <- runif(8, 0, 200)
initGuess[2] <- runif(1)

optNM <- optim(initGuess, energyFunc, method="Nelder-Mead", control=list(trace=1, maxit=1e6))

library(stats)
initGuess <- runif(8, 0, 200)
initGuess[2] <- runif(1)
optSA <- optim(initGuess, energyFunc, method="SANN", control=list(temp=300, maxit=1e8))

Nelder-Mead Simplex

Because Numerical Methods originally used SA for our types of problems by combining with a simplex, I want to understand the Nelder-Mead simplex downhill search.

We will test it using a rather trivial example of finding roots to a binomial equation.

evalFunc <- function(xyVals){
  x <- xyVals[1]
  y <- xyVals[2]
  outVal <- x^2 - 4*x + y^2 - y - x*y
  return(outVal)
}

usePoints <- list(c(0, 0),
                   c(1.2, 0),
                   c(0.0, 0.8))

We evaluate the points!

evalPoints <- sapply(usePoints, evalFunc)

And sort them from best to worst!

sortEval <- order(evalPoints, decreasing=F)
evalPoints <- evalPoints[sortEval]
usePoints <- usePoints[sortEval]
names(usePoints) <- c("b", "g", "w")

Calculate the mid-point of best two points:

calcMid <- function(inPoints, nDim){
  allPoints <- matrix(unlist(inPoints), ncol=nDim, byrow=FALSE)
  midPoint <- rowMeans(allPoints)
  return(midPoint)
}

midPoint <- calcMid(usePoints[1:2])

Generate a new reflected, expanded, or contracted point (which is dependent on the coefficient passed in).

calcNewPoint <- function(inMid, inWorst, useCoefficient=1){
  newPoint <- inMid + useCoefficient * (inMid - inWorst)
  names(newPoint) <- NULL
  return(newPoint)
}

scalePoints <- function(inPoint, useCoefficient=0.5){
  return(inPoint * useCoefficient)
}

Reflect our current simplex.

refPoint <- calcNewPoint(midPoint, usePoints[[3]])
evalRef <- evalFunc(refPoint)
evalRef
evalPoints[1]

Our new point has a better energy than the best point. We should try expanding it!

expPoint <- calcNewPoint(midPoint, usePoints[[3]], 2)
evalExp <- evalFunc(expPoint)
evalExp
evalPoints[1]

OK, even better. Lets replace the previous best point.

usePoints[[1]] <- expPoint

This function expects a list of points, and a function to evaluate each point. Returns a new simplex.

simplexLogic <- function(inPoints, evalFunction){
  nDim <- length(inPoints) - 1

  # order the points from best to worst
  inEval <- sapply(inPoints, evalFunction)
  newOrder <- order(inEval, decreasing=FALSE)

  inEval <- inEval[newOrder]
  inPoints <- inPoints[newOrder]
  outPoints <- inPoints

  midPoint <- calcMid(inPoints[1:nDim], nDim)

  # try reflection
  refPoint <- calcNewPoint(midPoint, inPoints[[nDim+1]], 1)
  refEval <- evalFunction(refPoint)

  if (refEval < inEval[2]){
    if (inEval[2] < refEval){
      outPoints[[3]] <- refPoint
    } else {
      expPoint <- calcNewPoint(midPoint, inPoints[[nDim+1]], 2)
      expEval <- evalFunction(expPoint)
      if (expEval < inEval[1]){
        outPoints[[3]] <- expPoint
      } else {
        outPoints[[3]] <- refPoint
      }
    }
  } else {
    if (refEval < inEval[3]){
      outPoints[[3]] <- refPoint
    }
    contPoint <- calcNewPoint(midPoint, inPoints[[nDim+1]], -0.5)
    contEval <- evalFunction(contPoint)

    if (contEval < inEval[3]){
      outPoints[[3]] <- contPoint
    } else {
      outPoints[2:(nDim+1)] <- lapply(inPoints[2:(nDim+1)], scalePoints)
    }


  }
  return(outPoints)
}

Check that Nelder-Mead works properly

queryVals <- seq(-5, 5, 0.1)
nVal <- length(queryVals)
xVals <- rep(queryVals, nVal)
yVals <- rep(queryVals, each=nVal)
zVals <- sapply(seq(1, length(xVals)), function(x){evalFunc(c(xVals[x], yVals[x]))})

plotX <- queryVals
plotY <- queryVals
plotZ <- matrix(zVals, nrow=nVal, ncol=nVal, byrow=FALSE)

contour(plotX, plotY, plotZ, nlevels=80)
#points(3, 2, col="red")
rownames(plotZ) <- plotX
colnames(plotZ) <- plotY
plotZ["3", "2"]

initPoint <- usePoints
lapply(usePoints, function(x){
  points(x[1], x[2], col="green")
})

iter <- 1

outPoints <- list(20)

while (iter < 20){
  outPoints[[iter]] <- initPoint
  initPoint <- simplexLogic(initPoint, evalFunc)
  iter <- iter + 1
}

plotPoints <- function(inPoints, useCol="green"){
  lapply(inPoints, function(x){
    points(x[1], x[2], col=useCol)
  })
}

plotPoints(outPoints[[1]])
plotPoints(outPoints[[2]])
plotPoints(outPoints[[3]], "orange")
plotPoints(outPoints[[4]], "red") # this didn't do the expected reflection
plotPoints(outPoints[[5]], "blue")
plotPoints(outPoints[[6]], "green")
plotPoints(outPoints[[7]], "orange")
plotPoints(outPoints[[8]], "blue")
plotPoints(outPoints[[9]], "black")
plotPoints(outPoints[[10]], "red"