Nothing
#' Searches for better starting values.
#'
#' Given a set of starting values and a range for them, searches for points with a better likelihood and steeper gradients.
#'
#' This function implements a simplified version of the algorithm proposed by Bierlaire, M., Themans, M. & Zufferey, N. (2010), A Heuristic for Nonlinear Global Optimization, INFORMS Journal on Computing, 22(1), pp.59-70. The main difference
#' lies in it implementing only two out of three tests on the candidates described by the authors. The implemented algorithm has the
#' following steps.
#' \enumerate{
#' \item Randomly draw \code{nCandidates} candidates from an interval given by the user.
#' \item Label all candidates with a valid log-likelihood (LL) as active.
#' \item Apply \code{bfgsIter} iterations of the BFGS algorithm to each active candidate.
#' \item Apply the following tests to each active candidate:
#' \enumerate{
#' \item Has the BGFS search converged?
#' \item Are the candidate parameters after BFGS closer than \code{dTest} from any other candidate with higher LL?
#' \item Is the LL of the candidate after BFGS further than \code{distLL} from a candidate with better LL, and its gradient smaller than \code{gTest}?
#' }
#' \item Mark any candidates for which at least one test results in yes as inactive.
#' \item Go back to step 3, unless only one candidate is active, or the maximum number of iterations (\code{maxStages}) has been reached.
#' }
#' This function will write a CSV file to the working/output directory summarising progress. This file is called \code{modelName}_searchStart.csv .
#' @param apollo_beta Named numeric vector. Names and values for parameters.
#' @param apollo_fixed Character vector. Names (as defined in \code{apollo_beta}) of parameters whose value should not change during estimation.
#' @param apollo_probabilities Function. Returns probabilities of the model to be estimated. Must receive three arguments:
#' \itemize{
#' \item \strong{\code{apollo_beta}}: Named numeric vector. Names and values of model parameters.
#' \item \strong{\code{apollo_inputs}}: List containing options of the model. See \link{apollo_validateInputs}.
#' \item \strong{\code{functionality}}: Character. Can be either \strong{\code{"components"}}, \strong{\code{"conditionals"}}, \strong{\code{"estimate"}} (default), \strong{\code{"gradient"}}, \strong{\code{"output"}}, \strong{\code{"prediction"}}, \strong{\code{"preprocess"}}, \strong{\code{"raw"}}, \strong{\code{"report"}}, \strong{\code{"shares_LL"}}, \strong{\code{"validate"}} or \strong{\code{"zero_LL"}}.
#' }
#' @param apollo_inputs List grouping most common inputs. Created by function \link{apollo_validateInputs}.
#' @param searchStart_settings List. Contains settings for this function. User input is required for all settings except those with a default or marked as optional.
#' \itemize{
#' \item \strong{\code{apolloBetaMax}}: Vector. Maximum possible value of parameters when generating candidates. Ignored if smartStart is TRUE. Default is \code{apollo_beta + 0.1}.
#' \item \strong{\code{apolloBetaMin}}: Vector. Minimum possible value of parameters when generating candidates. Ignored if smartStart is TRUE. Default is \code{apollo_beta - 0.1}.
#' \item \strong{\code{bfgsIter}}: Numeric scalar. Number od BFGS iterations to perform at each stage to each remaining candidate. Default is 20.
#' \item \strong{\code{dTest}}: Numeric scalar. Tolerance for test 1. A candidate is discarded if its distance in parameter space to a better one is smaller than \code{dTest}. Default is 1.
#' \item \strong{\code{gTest}}: Numeric scalar. Tolerance for test 2. A candidate is discarded if the norm of its gradient is smaller than \code{gTest} AND its LL is further than \code{llTest} from a better candidate. Default is \code{10^(-3)}.
#' \item \strong{\code{llTest}}: Numeric scalar. Tolerance for test 2. A candidate is discarded if the norm of its gradient is smaller than \code{gTest} AND its LL is further than \code{llTest} from a better candidate. Default is 3.
#' \item \strong{\code{maxStages}}: Numeric scalar. Maximum number of search stages. The algorithm will stop when there is only one candidate left, or if it reaches this number of stages. Default is 5.
#' \item \strong{\code{nCandidates}}: Numeric scalar. Number of candidate sets of parameters to be used at the start. Should be an integer bigger than 1. Default is 100.
#' \item \strong{\code{smartStart}}: Boolean. If TRUE, candidates are randomly generated with more chances in the directions the Hessian indicates improvement of the LL function. Default is FALSE.
#' }
#' @return named vector of model parameters. These are the best values found.
#' @export
#' @importFrom numDeriv hessian
#' @importFrom maxLik maxLik
apollo_searchStart <- function(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs, searchStart_settings=NA){
# # # # # # # # # # #
#### Initialise ####
# # # # # # # # # # #
### Load defaults
default <- list(nCandidates=100, apolloBetaMin=apollo_beta - 0.1, apolloBetaMax=apollo_beta + 0.1,
smartStart=FALSE, maxStages=5, dTest=1, gTest=10^-3, llTest=3, bfgsIter=20)
if(length(searchStart_settings)==1 && is.na(searchStart_settings)) searchStart_settings <- default
tmp <- names(default)[!(names(default) %in% names(searchStart_settings))] # options missing in searchStart_settings
for(i in tmp) searchStart_settings[[i]] <- default[[i]]
### Extract setings
nCandidates = searchStart_settings[["nCandidates"]]
apolloBetaMin = searchStart_settings[["apolloBetaMin"]]
apolloBetaMax = searchStart_settings[["apolloBetaMax"]]
smartStart = searchStart_settings[["smartStart"]]
maxStages = searchStart_settings[["maxStages"]]
dTest = searchStart_settings[["dTest"]]
gTest = searchStart_settings[["gTest"]]
llTest = searchStart_settings[["llTest"]]
bfgsIter = searchStart_settings[["bfgsIter"]]
if(!is.null(apollo_inputs$silent)) silent <- apollo_inputs$silent else silent <- FALSE
if(!is.null(apollo_inputs$apollo_control$seed)) seed <- apollo_inputs$apollo_control$seed + 4 else seed <- 13 + 4
### Checks
if(nCandidates<2) stop("SYNTAX ISSUE - Argument 'nCandidates' should be at least 2.")
if(maxStages<1) stop("SYNTAX ISSUE - Argument 'maxStages' should be at least 1.")
if(anyNA(c(apolloBetaMin,apolloBetaMax)) & !smartStart) stop("SYNTAX ISSUE - Invalid 'apolloBetaMin' and/or 'apolloBetaMax' parameters.")
apollo_print("Testing probability function (apollo_probabilities)")
apollo_inputs$apollo_control$noDiagnostics <- TRUE
apollo_probabilitiesVal <- apollo_insertComponentName(apollo_probabilities)
apollo_probabilitiesVal(apollo_beta, apollo_inputs, functionality="validate")
rm(apollo_probabilitiesVal)
### Pre-process likelihood function
if(!silent) apollo_print("Pre-processing likelihood function...")
# Create multi-core version of likelihood (if needed)
apollo_logLike <- apollo_makeLogLike(apollo_beta, apollo_fixed,
apollo_probabilities, apollo_inputs,
list(estimationRoutine='BHHH'))
on.exit(if(exists('apollo_logLike') && apollo_inputs$apollo_control$nCores>1) parallel::stopCluster(environment(apollo_logLike)$cl))
# Create gradient function if required (and possible)
if(!is.null(apollo_inputs$apollo_control$analyticGrad) && apollo_inputs$apollo_control$analyticGrad){
# apollo_makeGrad will create gradient function ONLY IF all components have analytical gradient.
grad <- apollo_makeGrad(apollo_beta, apollo_fixed, apollo_logLike, validateGrad=TRUE)
if(is.null(grad)) apollo_inputs$apollo_control$analyticGrad <- FALSE
} else grad <- NULL
### Separate beta into variable and fixed part
beta_var_val <- apollo_beta[!(names(apollo_beta) %in% apollo_fixed)]
beta_fix_val <- apollo_beta[apollo_fixed]
K <- length(beta_var_val)
# # # # # # # # # # # # # # # # #
#### Generate candidate set ####
# # # # # # # # # # # # # # # # #
### Only non-fixed parameters are considered.
# Candidates AT THE START OF EACH ITERATION are stored in a list of matrices called 'candidates'.
# Each element of the list is a matrix where each row is a candidate
set.seed(seed)
candidates <- list()
apollo_print(paste("Creating initial set of",nCandidates,"candidate values."))
if(smartStart){
# Create neighbours of starting value using Hessian (Bierlaire et al. 2007)
apollo_print(" (using Hessian, this might take a while).")
if(!is.null(grad)){
sumGradLL <- function(theta) colSums( grad(theta) )
H <- numDeriv::jacobian(func=sumGradLL, x=beta_var_val)
} else H <- numDeriv::hessian(apollo_logLike, beta_var_val, sumLL=TRUE)
eig <- eigen(H)
w <- apply(eig$vectors, MARGIN=2, function(wi) wi/sqrt(sum(wi^2)))
w <- cbind(w,-w)
p <- rep(exp(0.5*eig$values),2)
p <- p/sum(p)
w <- w[, sample(ncol(w), nCandidates-1, replace=TRUE, prob=p)]
w <- split(w, rep(1:ncol(w), each = nrow(w)))
a <- as.list(stats::runif(length(w), min=0.75, max=1))
z <- beta_var_val + mapply(function(wi,ai) wi*ai , w, a)
z <- cbind(beta_var_val,z) # matrix where each column is a candidate
candidates[[1]] <- t(z)
rm(H, eig, w, p, a, z)
} else{
# Create random starting values using apolloBetaMin & apolloBetaMax.
apolloBetaMin <- apolloBetaMin[names(beta_var_val)]
apolloBetaMax <- apolloBetaMax[names(beta_var_val)]
candidates[[1]] <- t(apollo_mlhs(nCandidates-1,K,1)) #matrix(stats::runif((nCandidates-1)*K), nrow=K, ncol=nCandidates-1)
candidates[[1]] <- apolloBetaMin + (apolloBetaMax-apolloBetaMin)*candidates[[1]]
candidates[[1]] <- cbind(beta_var_val, candidates[[1]])
candidates[[1]] <- t(candidates[[1]])
}
### Calculate LL of all candidates AT THE START OF THE ITERATION
LL <- rep(0, nCandidates)
cat("Calculating LL of candidates 0%")
for(j in 1:nCandidates){
beta_test <- as.vector(candidates[[1]][j,])
names(beta_test) <- names(beta_var_val)
LL[j] <- apollo_logLike(beta_test, sumLL=TRUE)
if(j==ceiling(nCandidates/2)) cat("50%") else if(j%%ceiling(nCandidates/10)==0) cat(".")
}
cat("100%\n")
### Remove candidates with infinite or NA loglike
if(any(!is.finite(LL))){
removeRows <- which(!is.finite(LL))
candidates[[1]] <- candidates[[1]][-removeRows,]
LL <- LL[-removeRows]
nCandidates <- nCandidates - length(removeRows)
apollo_print(paste0(length(removeRows), " candidates removed due to non-finite starting LL. ", nCandidates, " remain."))
if(nCandidates<1) stop("CALCULATION ISSUE - All initial candidates had non-finite starting LL.")
if(nCandidates==1) return(candidates[[1]])
}
### Write candidates
fileName <- paste0(apollo_inputs$apollo_control$modelName,"_searchStart.csv")
if(dir.exists(apollo_inputs$apollo_control$outputDirectory)){
lastChar <- nchar(apollo_inputs$apollo_control$outputDirectory)
lastChar <- substr(apollo_inputs$apollo_control$outputDirectory, start=lastChar, stop=lastChar)
if(lastChar=="/") fileName <- paste0(apollo_inputs$apollo_control$outputDirectory, fileName)
if(lastChar!="/") fileName <- paste0(apollo_inputs$apollo_control$outputDirectory, "/", fileName)
rm(lastChar)
}
utils::write.csv(cbind(candidates[[1]], LL=LL, stage=1), fileName, row.names=FALSE)
# # # # # # # # # # # # # # # #
#### Loop over candidates ####
# # # # # # # # # # # # # # # #
active <- rep(TRUE, nCandidates)
converged <- rep(FALSE, nCandidates)
s <- 1
while(sum(active)>1 & s<=maxStages){
activeRows <- which(active)
apollo_print('\n')
apollo_print(paste0("Stage ", s, ", ", length(activeRows), " active candidates."))
apollo_print(paste("Estimating", bfgsIter, "BFGS iteration(s) for each active candidate."))
cat(" Candidate......LLstart.....LLfinish.....GradientNorm...Converged")
# Apply BFGS for each active candidate
LL <- cbind(LL, NA)
colnames(LL) <- paste0("LL", 1:ncol(LL))
candidates[[s+1]]<- matrix(NA, nrow=nCandidates, ncol=K)
gradientNorm <- rep(NA, nCandidates)
for(j in activeRows){
LLstart <- as.character(round(LL[j,s],0))
cat("\n ",rep(" ",9-nchar(as.character(j))), j,
" ", rep(" ",12-nchar(LLstart)),LLstart, sep="")
if(!converged[j]){ # If it hasn't converged yet
candidateParam <- as.vector(candidates[[s]][j,])
names(candidateParam) <- names(beta_var_val)
model <- maxLik::maxLik(apollo_logLike, start=candidateParam,
method="bfgs", print.level=0, grad=grad,
finalHessian=FALSE, iterlim=bfgsIter)
candidates[[s+1]][j,] <- model$estimate
LL[j,s+1] <- model$maximum
gradientNorm[j] <- sqrt(sum(model$gradient^2))
converged[j] <- ifelse(model$code==0, TRUE, FALSE)
} else {# if it has converged already
candidates[[s+1]][j,] <- candidates[[s]][j,]
LL[j,s+1] <- LL[j,s]
gradientNorm[j] <- 0 # approximation
converged[j] <- TRUE
}
LLfinish <- as.character(round(LL[j,s+1],0))
gradFin <- as.character(round(gradientNorm[j],3))
cat(" ", rep(" ",12-nchar(LLfinish)), LLfinish,
" ", rep(" ",16-nchar(gradFin)), gradFin,
" ", rep(" ",10), 1*converged[j], sep="")
}; cat('\n')
rm(LLstart, candidateParam, model, LLfinish, gradFin)
# Update active list
for(j in activeRows){
candParam <- as.vector(candidates[[s+1]][j,])
candLL <- LL[j,s+1]
betterLLRows <- activeRows[LL[activeRows,s+1]>=candLL & activeRows!=j]
# Test 0: Converged to a worst solution
if(converged[j] & length(betterLLRows)>0) active[j] <- FALSE
# Apply tests only if it hasn't converged and is not the best LL of the stage
if(length(betterLLRows)>0 & !converged[j]){
# Test 1: Distance in parameter space to a better one
betterParams <- candidates[[s+1]][betterLLRows,]
if(length(betterLLRows)==1) betterParams <- matrix(betterParams, nrow=1)
distParam <- apply(betterParams, MARGIN=1, function(x) sqrt(sum((x-candParam)^2)) )
failedT1Rows <- betterLLRows[ distParam<dTest ]
# Test 2: small gradient norm and close to another
distLL <- abs(as.vector(LL[betterLLRows,s+1] - candLL))
failedT2Rows <- betterLLRows[ gradientNorm[betterLLRows]<gTest & distLL>=llTest ]
if(length(failedT1Rows)>0 | length(failedT2Rows)>0){
active[j] <- FALSE
apollo_print(paste0("Candidate ", j, " dropped."))
if(length(failedT1Rows)>0) apollo_print(paste0("- Failed test 1: Too close to ",
paste0(failedT1Rows, collapse=', '),
" in parameter space."))
if(length(failedT2Rows)>0) apollo_print(paste0("- Failed test 2: Converging to a worse solution than",
paste0(failedT2Rows, collapse=', ')))
}
rm(betterParams, distParam, failedT1Rows, failedT2Rows)
}
rm(candParam, candLL, betterLLRows)
}
# Print best candidate to screen
bestCandRow <- which.max(LL[,s+1])
bestCandParam <- as.vector(candidates[[s+1]][bestCandRow,])
names(bestCandParam) <- names(beta_var_val)
bestCandParam <- c(bestCandParam, apollo_beta[apollo_fixed])
bestCandParam <- bestCandParam[names(apollo_beta)]
bestCandParam <- matrix(bestCandParam, ncol=1, dimnames = list(names(apollo_beta), 'Value'))
apollo_print(paste0("Best candidate so far (LL=", round(LL[bestCandRow,s+1],1), ")"))
print(as.matrix(round(bestCandParam,4)))
# Write current state to file
tryCatch(utils::write.csv(cbind(candidates[[1]], LL), fileName, row.names=FALSE),
error=function(e) apollo_print(paste0("Stage update ",s," could not be written to file", fileName)))
# Next iteration
s <- s+1
}
tmp <- as.vector(bestCandParam)
names(tmp) <- rownames(bestCandParam)
invisible(return(tmp))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.