#' @title Moving Particles
#'
#' @description This function runs the Moving Particles algorithm for estimating extreme probability
#' and quantile.
#'
#' @author Clement WALTER \email{clementwalter@icloud.com}
#'
#' @aliases LPA
#'
#' @details \code{MP} is a wrap up of \code{\link{IRW}} for probability and quantile estimation.
#' By construction, the several calls to \code{\link{IRW}} are parallel (\pkg{foreach})
#' and so is the algorithm. Especially, with \code{N.batch=1}, this is the Last Particle
#' Algorithm, which is a specific version of \code{\link{SubsetSimulation}} with \code{p_0 = 1-1/N}.
#' However, note that this algorithm not only gives a quantile or a probability estimate
#' but also an estimate of the whole cdf until the given threshold \code{q}.
#'
#' The probability estimator only requires to generate several random walks as it is the estimation
#' of the parameter of a Poisson random variable. The quantile estimator is a little bit more complicated
#' and requires a 2-passes algorithm. It is thus not exactly fully parallel as cluster/cores have to
#' communicate after the first pass. During the first pass, particles are moved a given number of
#' times, during the second pass particles are moved until the farthest event reach during the first
#' pass. Hence, the random process is completely simulated until this given state.
#'
#' For an easy user experiment, all the parameters are defined by default with the optimised values
#' as described in the reference paper (see References below) and a typical use will only specify
#' \code{N} and \code{N.batch}.
#'
#' @note The \code{alpha} parameter is set to 0.05 by default. Indeed it should not be
#' set too small as it is defined approximating the Poisson distribution with the Gaussian one.
#' However if no estimate is produce then the algorithm can be restarted for the few missing events.
#' In any cases, setting \code{Niter_1fold = -N/N.batch*log(p)} gives 100\% chances to produces
#' a quantile estimator.
#'
#' @seealso
#' \code{\link{SubsetSimulation}}
#' \code{\link{MonteCarlo}}
#' \code{\link{IRW}}
#'
#' @references
#' \itemize{
#' \item A. Guyader, N. Hengartner and E. Matzner-Lober:\cr
#' \emph{Simulation and estimation of extreme quantiles and extreme
#' probabilities}\cr
#' Applied Mathematics \& Optimization, 64(2), 171-196.\cr
#' \item C. Walter:\cr
#' \emph{Moving Particles: a parallel optimal Multilevel Splitting
#' method with application in quantiles estimation and meta-model
#' based algorithms}\cr
#' Structural Safety, 55, 10-25.\cr
#' \item E. Simonnet:\cr
#' \emph{Combinatorial analysis of the adaptive last particle method}\cr
#' Statistics and Computing, 1-20.\cr
#' }
#'
#' @examples
#' \dontrun{
#' # Estimate some probability and quantile with the parabolic lsf
#' p.est <- MP(2, kiureghian, N = 100, q = 0) # estimate P(lsf(X) < 0)
#' p.est <- MP(2, kiureghian, N = 100, q = 7.8, lower.tail = FALSE) # estimate P(lsf(X) > 7.8)
#'
#' q.est <- MP(2, kiureghian, N = 100, p = 1e-3) # estimate q such that P(lsf(X) < q) = 1e-3
#' q.est <- MP(2, kiureghian, N = 100, p = 1e-3, lower.tail = FALSE) # estimate q such
#' # that P(lsf(X) > q) = 1e-3
#'
#'
#' # plot the empirical cdf
#' plot(xplot <- seq(-3, p.est$L_max, l = 100), sapply(xplot, p.est$ecdf_MP))
#'
#' # check validity range
#' p.est$ecdf_MP(p.est$L_max - 1)
#' # this example will fail because the quantile is greater than the limit
#' tryCatch({
#' p.est$ecdf_MP(p.est$L_max + 0.1)},
#' error = function(cond) message(cond))
#'
#' # Run in parallel
#' library(doParallel)
#' registerDoParallel()
#' p.est <- MP(2, kiureghian, N = 100, q = 0, N.batch = getDoParWorkers())
#' }
#'
#' @import ggplot2
#' @import foreach
#' @import iterators
#' @import doParallel
#' @export
MP = function(dimension,
#' @param dimension the dimension of the input space.
lsf,
#' @param lsf the function defining the RV of interest Y = lsf(X).
N = 100,
#' @param N the total number of particles,
N.batch = foreach::getDoParWorkers(),
#' @param N.batch the number of parallel batches for the algorithm. Each batch will then
#' have \code{N/N.batch} particles. Typically this could be \code{detectCores()} or some
#' other machine-derived parameters. Note that \code{N/N.batch} has to be an integer.
p,
#' @param p a given probability to estimate the corresponding quantile (as in qxxxx functions).
q,
#' @param q a given quantile to estimate the corresponding probability (as in pxxxx functions).
lower.tail = TRUE,
#' @param lower.tail as for pxxxx functions, TRUE for estimating P(lsf(X) < q), FALSE
#' for P(lsf(X) > q).
Niter_1fold,
#' @param Niter_1fold a function = fun(N) giving the deterministic number of iterations
#' for the first pass.
alpha = 0.05,
#' @param alpha when using default \code{Niter_1fold} function, this is the risk not to have
#' simulated enough samples to produce a quantile estimator.
compute_confidence = FALSE,
#' @param compute_confidence if \code{TRUE}, the algorithm runs a little bit longer to produces
#' a 95\% interval on the quantile estimator.
verbose = 0,
#' @param verbose to control level of print (either 0, or 1, or 2).
chi2 = FALSE,
#' @param chi2 for a chi2 test on the number of events.
breaks = N.batch/5,
#' @param breaks for the final histogram is \code{chi2 == TRUE}.
...) {
#' @param ... further arguments past to \code{\link{IRW}}.
cat('========================================================================\n')
cat(' Beginning of MP algorithm\n')
cat('========================================================================\n')
## STEP 0 : INITIALISATION
# Fix NOTE issue with R CMD check
k <- NULL
# Define transpose for list of lists with same fields, eg output of foreach
t.list <- function(l){
lapply(split(do.call("c", l), names(l[[1]])), unname)
}
arg <- list(...)
arg$dimension <- dimension
arg$lsf <- lsf
arg$N <- N/N.batch
if(lower.tail==TRUE){
lsf_dec = arg$lsf
arg$lsf = function(x) -1*lsf_dec(x)
}
estim.proba <- missing(p)
estim.quantile <- !estim.proba
if(estim.quantile){ # quantile estimation
if(missing(Niter_1fold)){
Niter_1fold = function(N,n = N.batch) {
t = -log(p)
t = t + 1.96*sqrt(t/n/N)*compute_confidence
if(n>1){
bn = sqrt(2*log(n)) - 0.5*( log(log(n)) + log(4*pi) )/sqrt(2*log(n))
beta = bn - log(log(1/alpha))/sqrt(2*log(n))
delta = beta^2 + 4*N*t
}
else{
bn = 0
beta = 0
delta = 0
}
res = ceiling(0.5*(beta^2 + 2*N*t - beta*sqrt(delta) ))
return(res)
}
}
moves = c()
cat(" =============================================== \n")
cat(" STEP 1 : MOVE PARTICLES A GIVEN NUMBER OF TIMES \n")
cat(" =============================================== \n\n")
cat(" * Number of deterministic event per algorithm =",Niter_1fold(N),"\n\n")
cat(" ### PARALLEL PART ###\n")
cat(" * backend:", ifelse(is.null(foreach::getDoParName()), "no registered backend", foreach::getDoParName()), "\n")
cat(" * N.batch =", N.batch, "\n\n")
mp.1 <- foreach(k=icount(N.batch)) %dopar% {
arg$Nevent = Niter_1fold(N)
if(verbose<2){
capture.output(mp <- do.call(IRW,arg))
} else {
mp <- do.call(IRW, arg)
}
mp[c('L', 'Ncall', 'X', 'y')]
}
t.mp.1 <- t(mp.1)
Ncall <- unlist(t.mp.1$Ncall)
L <- unlist(t.mp.1$L)
L_max <- max(L)
# cat("\n ### END OF PARALLEL PART; furthest point =", (-1)^lower.tail*L_max,"\n\n")
cat(" ===================================================== \n")
cat(" STEP 2 : RESTART ALGORITHM UNTIL",(-1)^lower.tail*L_max," \n")
cat(" ===================================================== \n\n")
cat(" ### PARALLEL PART ###\n")
cat(" * backend:", foreach::getDoParName(), "\n")
cat(" * N.batch =", N.batch, "\n\n")
mp.2 <- foreach(i=iter(mp.1)) %dopar% {
arg = c(list(
X = i$X,
y = i$y,
q = L_max,
last.return = FALSE),
arg)
if(verbose<2){
capture.output(mp <- do.call(IRW,arg))
} else {
mp <- do.call(IRW,arg)
}
tryCatch(
if(max(mp$L) > L_max){stop("Last time of algorithms should not be greater than furthest time of the first step \n")},
warning = function(cond){}
)
mp$L <- mp$L[-1] # First time equals last one from first pass
mp[c('L', 'Ncall', 'X', 'y')]
}
t.mp.2 <- t(mp.2)
moves <- sapply(t.mp.2$L, length) + Niter_1fold(N)
X = do.call(cbind, t.mp.2$X)
y = unlist(t.mp.2$y)
Ncall <- Ncall + unlist(t.mp.2$Ncall)
L = sort(c(L, unlist(t.mp.2$L)))
# cat("\n ### END OF PARALLEL PART \n\n")
}
else{ # probability estimation
arg$q <- (-1)^lower.tail*q
# cat(" ===================================== \n")
# cat(" STEP 1 : MOVE PARTICLES UNTIL FAILURE \n")
# cat(" ===================================== \n\n")
#
cat(" ### PARALLEL PART ###\n")
cat(" * backend:", foreach::getDoParName(), "\n")
cat(" * N.batch =", N.batch, "\n\n")
mp <- foreach(k=icount(N.batch)) %dopar% {
#cat(" * TRAIN N0",k,"in",N.batch,"\n")
if(verbose<2){capture.output(mp <- do.call(IRW, arg))}
else{mp <- do.call(IRW, arg)}
mp[c('Ncall', 'L', 'X', 'y')]
}
t.mp <- t(mp)
Ncall <- L <- X <- y <- NULL
lapply(mp, function(l) {
Ncall <<- c(Ncall, l$Ncall)
L <<- c(L, l$L)
X <<- cbind(X, l$X)
y <<- c(y, l$y)
})
moves <- sapply(t.mp$L, length)
# cat(" ### END OF PARALLEL PART \n\n")
}
cat("========================================================================\n")
cat(" End of MP algorithm \n")
cat("========================================================================\n\n")
Ntot = length(y)
if(estim.proba){
p = (1 - 1/Ntot)^sum(L<(q*(-1)^lower.tail))
p_int = c( p*exp(-1.96*sqrt(-log(p)/Ntot)) , p*exp(+1.96*sqrt(-log(p)/Ntot)) )
cov = sqrt(-log(p)/Ntot)
L_max <- q
q_int <- c(q, q)
}
else{
m = ceiling(-Ntot*log(p))
q = 0.5*(L[m-1] + L[m])*(-1)^lower.tail
q_int = c( L[floor(m-1.96*sqrt(m))] , L[ceiling(m+1.96*sqrt(m))] )*(-1)^lower.tail
cov = NA
p_int <- c(p, p)
L_max <- (-1)^lower.tail*L_max
}
L <- (-1)^lower.tail*L
ecdf_MP <- local({
lower.tail <- lower.tail
L_max <- L_max
L <- L;
Ntot <- Ntot
function(q) {
sapply(q, function(q) {
if(lower.tail==TRUE){
Nevent <- sum(L>q)
if(q<L_max) {
message(paste('Empirical cdf valid only for q >=', L_max))
Nevent <- NA
}
}
else{
Nevent <- sum(L<q)
if(q>L_max) {
message(paste('Empirical cdf valid only for q =<', L_max))
Nevent <- NA
}
}
(1-1/Ntot)^Nevent
})
}
})
if(chi2 == TRUE) {
res = hist(moves, breaks = breaks, freq = FALSE)
cont = res$counts; l = length(cont)
chi2_p = 1:l;
br = res$breaks
br[1] = -Inf; br[length(br)] = Inf;
for(i in 1:l){
chi2_p[i] = ppois(br[i+1],mean(moves)) - ppois(br[i], mean(moves))
}
capture.output(chi2.test <- chisq.test(x = cont, p = chi2_p))
p.val = 1 - pchisq(chi2.test$statistic, df = (l-2))
}
cat(" - p =",p,"\n")
cat(" - q =",q,"\n")
if(estim.proba){
cat(" - 95% confidence intervalle :",p_int[1],"< p <",p_int[2],"\n")
}
else{
if(lower.tail==FALSE){
cat(" - 95% confidence intervalle :",q_int[1],"< q <",q_int[2],"\n")
}
else{
cat(" - 95% confidence intervalle :",q_int[2],"< q <",q_int[1],"\n")
}
cat(" - Maximum number of moves/algorithm =",max(moves),"\n")
cat(" - L_max =",L_max,"\n")
cat(" - Number of moves =",length(L),"\n")
cat(" - Targeted number of moves =",ceiling(m+compute_confidence*1.96*sqrt(m)),"\n")
}
cat(" - Total number of calls =",sum(Ncall),"\n")
if(chi2 == TRUE) {cat(" - Chi-2 test =", chi2.test$statistic,"; p-value =", p.val,"\n")}
#' @return An object of class \code{list} containing the outputs described below:
res = list(p = p,
#' \item{p}{the estimated probability or the reference for the quantile estimate.}
q = q,
#' \item{q}{the estimated quantile or the reference for the probability estimate.}
cv = ifelse(estim.proba, sqrt(sum(moves))/Ntot, 0),
#' \item{cv}{the coefficient of variation of the probability estimator.}
ecdf = ecdf_MP,
#' \item{ecdf}{the empirical cdf.}
L = L,
#' \item{L}{the states of the random walk.}
L_max = L_max,
#' \item{L_max}{the farthest state reached by the random process. Validity range
#' for the \code{ecdf} is then (-Inf, L_max] or [L_max, Inf).}
times = L,
#' \item{times}{the \emph{times} of the random process.}
Ncall = Ncall,
#' \item{Ncall}{the total number of calls to the \code{lsf}.}
X = X,
#' \item{X}{the \code{N} particles in their final state.}
y = y,
#' \item{y}{the value of the \code{lsf(X)}.}
moves = moves)
#' \item{moves}{a vector containing the number of moves for each batch.}
if(estim.proba){
res <- c(res, list(
p_int = p_int,
#' \item{p_int}{a 95\% confidence intervalle on the probability estimate.}
cov = cov
#' \item{cov}{the coefficient of variation of the estimator}
))
}
else{
res <- c(res, list(
q_int = q_int
#' \item{q_int}{a 95\% confidence intervall on the quantile estimate.}
))
}
if(chi2 == TRUE) {res = c(res, list(chi2 = chi2.test))}
#' \item{chi2}{the output of the chisq.test function.}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.