Nothing
# Copyright (C) 2018 Markus Baaske. All Rights Reserved.
# This code is published under the GPL (>=3).
#
# File: simQLData.R
# Date: 14/03/2018
# Author: Markus Baaske
#
# Functions to collect the simulation results
# internal, default: create and use a local cluster object of size one
#' @importFrom digest digest
doInParallel <- function(X, FUN, ... , cl = NULL, iseed = NULL,
cores = getOption("mc.cores",1L), # force sequential processing if cores=1L
cache = getOption("qle.cache",FALSE),
fun = getOption("qle.multicore","lapply"))
{
SIM <- if(cache) {
for(i in 1:length(X))
attr(X[[i]],"id") <- i
tryCatch({
function(x,...) {
dg <- digest::digest(list(FUN, x, attr(x,"id")))
cache.fn <- sprintf("cached_%s.rda",dg)
if (file.exists(cache.fn)){
load(cache.fn)
} else {
var <- FUN(x,...)
save(var, file = cache.fn)
}
return(var)
}
}, error = function(e) {
structure(
list(message=.makeMessage("Failed to load/save/execute cached results.",
conditionMessage(e)),
call=sys.call()),
class=c("error","condition"), error=e)
})
} else FUN
noCluster <- is.null(cl)
if(!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1)
tryCatch({
if(cores > 1L || !noCluster) {
if(noCluster && fun == "mclapply" ){
# Only for L'Ecuyer-CMRG we get reproducible results
if(!is.null(iseed) && RNGkind()[1L] == "L'Ecuyer-CMRG")
set.seed(iseed)
if(.Platform$OS.type != "windows")
parallel::mclapply(X, SIM, ..., mc.cores = cores)
else {
options(mc.cores=1L)
lapply(X, SIM, ...)
}
} else {
if(noCluster){
## only a 'local' cluster is supported here
type <- if(Sys.info()["sysname"] == "Linux")
"FORK" else "PSOCK"
cl <- parallel::makeCluster(cores,type=type)
}
if(any(class(cl) %in% c("MPIcluster","SOCKcluster","cluster"))){
# Only for L'Ecuyer-CMRG we get reproducible results for a cluster
if(!is.null(iseed) && RNGkind()[1L] == "L'Ecuyer-CMRG")
parallel::clusterSetRNGStream(cl,iseed)
parallel::parLapply(cl, X = X, fun = SIM, ...)
} else {
stop(paste0("Failed to initialize cluster: unsupported cluster class: ",class(cl)))
}
}
} else {
noCluster <- FALSE
lapply(X, SIM, ...)
}
},error = function(e) {
return(
structure(
list(message=.makeMessage("Calling `",deparse(substitute(SIM)), "` failed: ",conditionMessage(e)),
call=sys.call()),
class = c("error", "condition"), error = e))
},finally = {
if(noCluster && !is.null(cl)){
if(inherits(try(parallel::stopCluster(cl),silent=TRUE),"try-error"))
message(.makeMessage("Failed to stop cluster object."))
cl <- NULL
invisible(gc)
}
})
}
#' @name simQLdata
#'
#' @title Simulate the statistical model
#'
#' @description The function runs simulations of the user-defined statistical model
#' either for a single parameter or a list of parameters.
#'
#' @param sim user supplied simulation function
#' @param X list or matrix of model parameters
#' @param ... arguments passed to `\code{sim}`
#' @param nsim number of simulation replications at each parameter
#' @param mode type of return value: "\code{list}", "\code{matrix}", "\code{mean}"
#' @param cl cluster object, \code{NULL} (default), of class \code{MPIcluster}, \code{SOCKcluster}, \code{cluster}
#' @param iseed integer seed for initializing the cluster workers
#' @param na.rm whether to remove \code{NA} values from simulation results
#' @param verbose if \code{TRUE}, then print intermediate output
#'
#' @return List of (aggregated) simulation results of class \code{simQL} and the following attributes:
#' \item{X}{ matrix of sample points (equal to `\code{X}` if supplied) or the result of \code{\link{multiDimLHS}}}
#' \item{nsim}{ number of simulation runs at each parameter}
#' \item{iseed}{ an integer seed value to initialize cluster workers}
#' \item{error}{ only in case of errors detected in simulation function}
#'
#' @details Basically, the given simulation function `\code{sim}` is called for each parameter value
#' `\code{nsim}` times and the results are returned depending on the chosen type `\code{mode}`.
#'
#' If `\code{X}` is \code{NULL} (default), then the design, that is, the matrix of sample points, is first generated
#' by \code{\link{multiDimLHS}} and the result is stored as an attribute named `\code{X}`. In this case, the arguments
#' `\code{N}`,`\code{bounds}` and `\code{method}` must be given as named input arguments as required by \code{\link{multiDimLHS}}.
#' In case of catched errors, the function tries to omit the corresponding simulation runs from the final results if possible.
#'
#' Depending on the complexity of the model simulations we strongly encourage the use of a cluster object `\code{cl}`. Make sure
#' to export all functions to the cluster environment beforehand which are required by the user-defined simulation function.
#' Also, using the option `\code{qle.cache}` caches results of a function call to be stored in the current working directory.
#' The filename is generated by a hash code using the \code{\link[digest]{digest}} package. Setting an integer seed \code{iseed} before
#' stores each result in a separate file and makes the data reproducible while loading the data again.
#'
#' @author M. Baaske
#'
#' @examples
#' # generate design points, simulate and return the sample means
#' sim <- simQLdata(sim=function(x) rlnorm(1,x[1],x[2]),
#' nsim=10,N=8, method="maximinLHS",
#' lb=c(-1.5,0),ub=c(2,1), mode="mean")
#'
#' @importFrom stats na.omit
#' @rdname simQLdata
#' @export
simQLdata <-
function(sim, ..., nsim, X = NULL, mode = c("list","matrix","mean"), cl = NULL,
iseed = NULL, na.rm = getOption("na.rm",TRUE), verbose = FALSE)
{
args <- list(...)
if(is.null(X)) {
my.args <- c("N","method","lb","ub")
id <- which(is.na(pmatch(my.args,names(args))))
if(length(id)>0L)
stop(paste(c("Arguments of `multiDimLHS` ",my.args[id]," not found."),collapse=" "))
# generate random sample
X <- multiDimLHS(args$N,args$lb,args$ub,method=args$method)
args <- args[-pmatch(my.args,names(args))]
} else {
if(!is.list(X))
X <- .ROW2LIST(X)
}
# check on input values
simFun <-
if(!inherits(sim,"simObj")){
sim <- match.fun(sim)
.checkfun(sim,args,remove=TRUE)
structure(
function(x) {
try(do.call(sim,c(list(x),args)))
},
class=c("simObj","function") )
} else sim
# run simulations
stopifnot(!missing(nsim) || is.numeric(nsim))
arg.list <- list(X=lapply(rep(X,each=nsim),unlist),
FUN=simFun, cl=cl, iseed=iseed)
# simulate model
if(verbose)
cat("Simulating the model...\n")
res <- do.call(doInParallel, arg.list)
# check results from user function simulation
if(inherits(res,"error")) {
msg <- .makeMessage("Error in `simQLdata`: ",conditionMessage(res))
message(msg)
return(.qleError(message=msg,call=match.call(),error=res))
}
errId <- which(sapply(res,function(x) .isError(x)))
if(length(errId) > 0L) {
msg <- .makeMessage("Simulation error (first error): ", res[[errId[1]]])
message(msg)
return(.qleError(message=msg,call=match.call(),error=res))
}
np <- length(X)
res <-
if(np > 1)
split(res, cut(seq_along(res),np,labels=FALSE))
else list(res)
mode <- match.arg(mode)
RES <- lapply(res,
function(x) {
# remove errors in results
err <- sapply(x,.isError)
ok <- which(!err)
if(length(ok) == 0L)
return (x)
structure(
switch(mode,
"list" = {
x[ok]
},
"mean"= {
if(na.rm)
apply(na.omit(as.data.frame(do.call(rbind,x[ok]))),2,mean)
else apply(do.call(rbind,x[ok]),2,mean)
},
"matrix" = {
data.matrix(
if(na.rm)
na.omit(as.data.frame(do.call(rbind,x[ok])))
else do.call(rbind,x[ok]))
}
), error = if(sum(err) > 0L) {
id <- which(err)
message(.makeMessage("A total of ",length(id), " errors detected in user defined simulation function."))
structure(x[id],nErr = length(id))
} else NULL)
}
)
names(RES) <- NULL
nErr <- which(sapply(RES, function(x) !is.null(attr(x,"error"))))
structure(RES,
nsim=nsim,
X=.LIST2ROW(X),
iseed=iseed,
error=if(length(nErr) > 0L) nErr else NULL,
class="simQL", call=sys.call())
}
# Cholesky decomposition of covariance matrices
varCHOLdecomp <- function(matList) {
# n <- NROW(matList[[1]])
# lx <- n*(n+1)/2
if(!is.matrix(matList[[1]]))
stop("Elements of list must be matrices.\n", call.=TRUE)
doIt <- function(i,x) {
m <- try(chol(x[[i]],pivot = FALSE), silent=TRUE )
if(inherits(m,"try-error") || anyNA(m) )
return(.qleError(message="Cholseky decomposition failed: ",
call=match.call(),error=m))
as.vector(m[col(m)>=row(m)])
}
lapply(1:length(matList),doIt,x=matList)
}
.resampleCov <- function(T,num){
n <- nrow(T)
apply(
sapply(1:num,
function(i){
TS <- apply(T,2,sample,replace=TRUE)
stopifnot(NROW(TS)==n)
m <- var(TS,na.rm=TRUE)
m[col(m)>=row(m)]
}
),1,var)
}
#' @name setQLdata
#'
#' @title Setup of quasi-likelihood data for estimation
#'
#' @description Aggregate the data for quasi-likelihood estimation
#'
#' @param runs list or matrix of simulation results obtained from \code{\link{simQLdata}}
#' @param X list or matrix of (design) points, i.e. model parameters
#' @param var.type name of variance matrix approximation type: "\code{cholMean}" (default)
#' @param Nb number of bootstrap samples, \code{=0} (default, no bootstrap used), to be generated for kriging the variance matrix,
#' only if `\code{var.type}`=`\code{kriging}`
#' @param na.rm if \code{TRUE} (default), remove `NA` values from simulation results
#' @param verbose if \code{TRUE}, print intermediate output
#'
#' @return
#' An object of class \code{QLdata} as a data frame with columns:
#' \item{X}{ Model parameters (\code{n=1},...,\code{q}) }
#' \item{mean}{ Results of simulation runs (\code{m=1},...,\code{p}) }
#' \item{var}{ Simulation variances of statistics (\code{m=1},...,\code{p}) }
#' \item{L}{ if applicable, Cholesky decomposed terms of variance matrices of statistics (k=1,...,(m*(m+1)/2))}
#' \item{Lb}{ if applicable, bootstrap variances of covariances}
#' where `\code{p}` denotes the number of user defined statistics and `\code{q}` the problem dimension, that is,
#' the number of statistical model parameters to be estimated.
#'
#' The following items are stored as attributes:
#'
#' \item{type}{ see above}
#' \item{nsim}{ number of simulations spent at each (design) point}
#' \item{xdim}{ length of model parameter}
#' \item{nWarnings}{ Number of warnings during simulation runs}
#' \item{nErrors}{ Number of errors during simulation runs}
#' \item{nIgnored}{ List of parameters ignored due to errors}
#'
#' @details
#' The function aggregates all neccessary data for quasi-likelihood estimation storing the
#' sample points and the corresponding simulation results of the statistics. If `\code{X}` equals \code{NULL},
#' then the sample points are taken from the object `\code{runs}`.
#'
#' The most critical part is the decomposition of variance matrices for each sample point unless `\code{var.type}`
#' equals "\code{const}" in which case a constant variance matrix approximation is expected to be given by the user in function \code{\link{qle}}.
#' The Cholesky decompositions are used for an average approximation of the variance matrices of the statistics when calculating the
#' quasi-score vector or any type of function criterion. If these fail for any reason we try to ignore, if possible, the corresponding sample
#' points and exclude these from all subsequent computations. Unless a constant variance matrix estimate is used, the default is to approximate the
#' variance matrix at any model parameter by either a kriging approximation of the \emph{Cholesky} terms (kriging the variance matrix) or as an average
#' over all sampled variance matrices (variance matrix average approximation) also based on the decomposed Cholesky terms (see vignette).
#'
#' @examples
#' # simulate model statistics at LHS design
#' sim <- simQLdata(sim =
#' function(x,cond) {
#' X <- rlnorm(cond$n,x[1],x[2])
#' c("MED"=median(X),"MAD"=mad(X))
#' },
#' cond=list("n"=10),
#' nsim=10, N=10, method="maximinLHS",
#' lb=c("mu"=-1.5,"sd"=0), ub=c("mu"=2,"sd"=1))
#'
#' # setup the QL data model using defaults
#' qldata <- setQLdata(sim,verbose=TRUE)
#'
#' @author M. Baaske
#' @rdname setQLdata
#' @export
setQLdata <- function(runs, X = NULL, var.type="cholMean",
Nb = 0, na.rm = TRUE, verbose = FALSE) {
nErrors <- 0
nWarnings <- 0
nsim <- attr(runs,"nsim")
stopifnot(!is.null(nsim) || is.numeric(nsim))
if(class(runs) != "simQL" || !is.list(runs))
stop("Argument `runs` must be of class `simQL` and `list` type.")
if(is.null(X))
X <- attr(runs,"X")
if(!is.matrix(X))
X <- .LIST2ROW(X)
.extract <- function(dataT) {
if( !is.null(attr(dataT,"error")) || inherits(dataT,"error")) {
msg <- paste0("Value of statistics has errors: ","\n")
message(msg)
return(.qleError(message=msg,call=match.call(),error=dataT))
}
if(is.list(dataT))
dataT <- do.call(rbind,dataT)
if(na.rm)
dataT <- dataT[!rowSums(is.na(dataT)) > 0L,,drop=FALSE]
V <- try( var(dataT, na.rm=na.rm),silent=TRUE)
if (is.na(V) || !is.numeric(V) || inherits(V,"try-error") ) {
msg <- paste0("Failed to get variance matrix of statistics.","\n")
message(msg)
return(.qleError(message=msg,call=match.call(),error=V))
}
is.pos <- .isPosDef(V)
if(is.pos != 0L) {
warning("Covariance matrix not positive definite!")
}
vmat <- as.matrix(V)
ret <- list("V" = vmat,
"mstat" = colMeans(dataT),
"vars" = diag(vmat),
"is.pos" = is.pos)
if(Nb > 0 && var.type == "kriging"){
ret$Lb <- try(.resampleCov(dataT,Nb),silent=TRUE)
if(inherits(ret$Lb,"try-error"))
attr(ret,"error") <- TRUE
}
ret
}
res <- tryCatch(
lapply(runs,.extract),
error = function(e) e)
if(.isError(res)){
msg <- .makeMessage("Extracting values of statistics failed.")
message(msg)
return(.qleError(message=msg,call=sys.call(),error=res))
}
ok <- which(
sapply(res,
function(x){
if(.isError(x)) {
ok <- FALSE # try to ignore this location
nErrors <<- nErrors+1
} else {
ok <- (x$is.pos == 0L)
if(!ok)
nWarnings <<- nWarnings+1
}
ok
}))
notOk <- integer(0)
msg <- "Normal completion"
nIgnored <-
if(length(ok) < length(res)) {
notOk <- which(!(1:length(res) %in% ok))
X[notOk,,drop=FALSE]
} else NULL
if(nErrors > 0L) {
msg <- paste0(c("Errors at sample points: ", notOk), collapse = " ")
message(msg)
} else if(nWarnings > 0L) {
msg <- paste(c("Variance matrix is not positive definite at sample points: ", notOk) ,collapse=" ")
message(msg)
}
if(!is.null(nIgnored) && nrow(nIgnored) > 0L){
message(.makeMessage("Ignored a total of ", nrow(nIgnored), " sample points.","\n"))
}
if(length(ok) == 0L ) {
msg <- .makeMessage("No parameters match the simulation conditions: ","\n")
message(msg)
return(
.qleError(message=msg,
call=match.call(),
xdim=ncol(X),
nsim=nsim,
nWarnings=nWarnings,
nErrors=nErrors,
nIgnored=nIgnored,
error=res)
)
}
# Cholesky decompositions
cvmats <-
if(var.type != "const") {
# covariance decompositions
if(verbose)
cat("Cholesky decompositon of covariance matrices...\n")
vmats <- lapply(res[ok],"[[","V" )
L <- try(varCHOLdecomp(vmats),silent=TRUE)
if(.isError(L)) {
msg <- paste0("Cholseky decomposition failed: ","\n")
message(msg)
stop(.qleError(message=msg,call=match.call(),error=L))
}
ok2 <- which(sapply(L,function(x) !.isError(x)))
if(length(ok2) == 0L){
msg <- paste0("All Cholesky decompositions failed: ","\n")
message(msg)
return(.qleError(message=msg,call=match.call(),error=L))
} else if(length(ok2) < length(L))
warning(paste0("A total of ",length(L)-length(ok2), " Cholesky decomposition(s) failed."))
cvmats <- as.data.frame(do.call(rbind, L[ok2]))
colnames(cvmats) <- paste("L", rep(1:NCOL(cvmats)),sep="")
# bootstrap variances of covariances
# a local nugget variances
if(Nb > 0 && var.type == "kriging"){
Lb <- lapply(res[ok],"[[","Lb")
Lbmats <- as.data.frame(do.call(rbind,Lb[ok2]))
colnames(Lbmats) <- paste("Lb", rep(1:NCOL(Lbmats)),sep="")
cvmats <- cbind(cvmats,Lbmats)
}
if(length(ok) != length(ok2))
ok <- ok2
cvmats
} else NULL
X <- X[ok,,drop=FALSE]
mstats <- do.call(rbind,lapply(res[ok],"[[","mstat"))
mvars <- do.call(rbind,lapply(res[ok],"[[","vars"))
qld <- as.data.frame(cbind(rbind(X),mstats,mvars))
nms <-
if(is.null(colnames(X)) | is.null(colnames(mstats)) | is.null(colnames(mvars))) {
c(colnames(qld[seq(ncol(X))]),
paste("mean.T",rep(1:NCOL(mstats)),sep=""),
paste("var.T",rep(1:NCOL(mvars)),sep=""))
} else {
c(colnames(X),
paste0("mean.",colnames(mstats)),
paste0("var.",colnames(mvars)))
}
colnames(qld) <- nms
if(!is.null(cvmats)){
qld <- cbind(qld,data.frame(cvmats))
}
structure(qld,
xdim=ncol(X),
nsim=nsim,
Nb=if(var.type != "kriging") 0 else Nb,
nWarnings=nWarnings,
nErrors=nErrors,
nIgnored=nIgnored,
message=msg,
call=match.call(),
class=c("QLdata","data.frame"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.