# Copyright (C) 2017 Markus Baaske. All Rights Reserved.
# This code is published under the GPL (>=3).
#
# File: simQLData.R
# Date: 25/10/2017
# Author: Markus Baaske
#
# Functions to collect the simulation results
# internal
#' @importFrom digest digest
doInParallel <- function(X, FUN, ... , cl = NULL, iseed = NULL,
cache = getOption("qle.cache",FALSE) )
{
SIM <- if(cache) {
for(i in 1:length(X))
attr(X[[i]],"id") <- i
tryCatch({
function(x,...) {
dg <- digest::digest(list(FUN, x, attr(x,"id")))
cache.fn <- sprintf("cached_%s.rda",dg)
if (file.exists(cache.fn)){
load(cache.fn)
} else {
var <- FUN(x,...)
save(var, file = cache.fn)
}
return(var)
}
}, error = function(e) {
structure(
list(message=.makeMessage("Failed to load/save/execute cached results.",
conditionMessage(e)),
call=sys.call()),
class=c("error","condition"), error=e)
})
} else FUN
tryCatch({
noCluster <- is.null(cl)
cores <- getOption("mc.cores",1L)
if(noCluster && (length(X)==1L || cores==1L)){
noCluster <- FALSE
lapply(X, SIM, ...)
} else {
if(noCluster){
type <- if(Sys.info()["sysname"]=="Linux")
"FORK" else "PSOCK"
try(cl <- parallel::makeCluster(cores,type=type),silent=FALSE)
}
if(is.null(cl))
stop("Could not initialize cluster object.")
if(any(class(cl) %in% c("MPIcluster","SOCKcluster","cluster"))){
clusterSetRNGStream(cl,iseed)
parallel::parLapplyLB(cl, X = X, fun = SIM, ...)
} else stop(paste0("Unsupported cluster object: ",class(cl)))
}
},error = function(e) {
return(
structure(
list(message=.makeMessage("Calling `",deparse(substitute(SIM)), "` failed: ",conditionMessage(e)),
call=sys.call()),
class = c("error", "condition"), error = e))
},finally = {
if(noCluster && !is.null(cl)){
if(inherits(try(stopCluster(cl),silent=TRUE),"try-error")){
rm(cl)
message("Error in stopping cluster.")
} else {
cl <- NULL
invisible(gc())
}
}
})
}
#' @name simQLdata
#'
#' @title Simulate the statistical model
#'
#' @description The function runs simulations of the user-defined statistical model
#' either for a single parameter or a list of parameters.
#'
#' @param sim user supplied simulation function
#' @param X list or matrix of model parameters
#' @param ... arguments passed to `\code{sim}`
#' @param nsim number of simulation replications at each parameter
#' @param mode type of return value
#' @param cl cluster object, \code{NULL} (default), of class "\code{MPIcluster}", "\code{SOCKcluster}", "\code{cluster}"
#' @param iseed integer seed for initializing the cluster workers
#' @param na.rm whether to remove `\code{NA}` values from simulation results
#' @param verbose if \code{TRUE}, then print intermediate output
#'
#' @return List of (aggregated) simulation results of class \code{simQL} and the following attributes:
#' \item{X}{ matrix of sample points equal to `\code{X}`, if supplied, or
#' the result of \code{\link{multiDimLHS}}}
#' \item{nsim}{ number of simulation runs at each parameter}
#' \item{iseed}{ an integer seed value to initialize cluster workers}
#' \item{error}{ only in case of errors detected in simulation function}
#'
#' @details Basically, the given simulation function `\code{sim}` is called for each parameter value
#' `\code{nsim}` times and the results are returned depending on the chosen type `\code{mode}`. This is either
#' a list or a matrix as the outcome of function `\code{sim}` or the mean of `\code{nsim}` simulation replications at each parameter.
#'
#' If `\code{X}` equals \code{NULL} (default), then the design, that is, the matrix of sample points, is first generated
#' by function \code{\link{multiDimLHS}} and the result is stored as an attribute named `\code{X}`. In this case, the arguments
#' `\code{N}`,`\code{bounds}` and `\code{method}` must be given as named input arguments which are then passed to \code{\link{multiDimLHS}}.
#' In case of errors catched, the corresponding simulation runs are omitted from the results if possible.
#'
#' We recommend using a cluster object `\code{cl}` depending on the complexity of model simulations. If using a cluster make sure
#' to export all functions to the cluster environment beforehand which are required by the user-defined simulation function.
#' Also, using the option `\code{qle.cache}` caches results of a function call to be stored in the current working directory.
#' The filename is generated by a hash code using the \code{\link[digest]{digest}} package. Setting an integer seed \code{iseed} before
#' stores each result in a separate file and makes the data reproducible while loading the data again.
#'
#' @author M. Baaske
#'
#' @examples
#' # generate design points, simulate and return the sample means
#' sim <- simQLdata(sim=function(x) rlnorm(1,x[1],x[2]),
#' nsim=10,N=8, method="maximinLHS",
#' lb=c(-1.5,0),ub=c(2,1), mode="mean")
#'
#' @importFrom stats na.omit
#' @rdname simQLdata
#' @export
simQLdata <-
function(sim, ..., nsim, X = NULL, mode = c("list","matrix","mean"), cl = NULL,
iseed = NULL, na.rm = getOption("na.rm",TRUE), verbose = FALSE)
{
args <- list(...)
if(is.null(X)) {
my.args <- c("N","method","lb","ub")
id <- which(is.na(pmatch(my.args,names(args))))
if(length(id)>0L)
stop(paste(c("Arguments of `multiDimLHS` ",my.args[id]," not found."),collapse=" "))
# generate random sample
X <- multiDimLHS(args$N,args$lb,args$ub,method=args$method)
args <- args[-pmatch(my.args,names(args))]
} else {
if(!is.list(X))
X <- .ROW2LIST(X)
}
# check on input values
simFun <-
if(!inherits(sim,"simObj")){
sim <- match.fun(sim)
.checkfun(sim,args,remove=TRUE)
structure(
function(x) {
try(do.call(sim,c(list(x),args)))
},
class=c("simObj","function") )
} else sim
# run simulations
stopifnot(!missing(nsim) || is.numeric(nsim))
arg.list <- list(X=lapply(rep(X,each=nsim),unlist),
FUN=simFun, cl=cl, iseed=iseed)
# simulate model
if(verbose)
cat("Simulating the model...\n")
res <- do.call(doInParallel, arg.list)
# check results from user function simulation
if(inherits(res,"error")) {
msg <- .makeMessage("Error in `simQLdata`: ",conditionMessage(res))
message(msg)
return(.qleError(message=msg,call=match.call(),error=res))
}
errId <- which(sapply(res,function(x) .isError(x)))
if(length(errId) > 0L) {
msg <- .makeMessage("Simulation error (first error): ", res[[errId[1]]])
message(msg)
return(.qleError(message=msg,call=match.call(),error=res))
}
np <- length(X)
res <-
if(np > 1)
split(res, cut(seq_along(res),np,labels=FALSE))
else list(res)
mode <- match.arg(mode)
RES <- lapply(res,
function(x) {
# remove errors in results
err <- sapply(x,.isError)
ok <- which(!err)
if(length(ok) == 0L)
return (x)
structure(
switch(mode,
"list" = {
x[ok]
},
"mean"= {
if(na.rm)
apply(na.omit(as.data.frame(do.call(rbind,x[ok]))),2,mean)
else apply(do.call(rbind,x[ok]),2,mean)
},
"matrix" = {
data.matrix(
if(na.rm)
na.omit(as.data.frame(do.call(rbind,x[ok])))
else do.call(rbind,x[ok]))
}),
error = if(sum(err) > 0L) {
id <- which(err)
message(.makeMessage("A total of ",length(id), " errors detected in user defined simulation function."))
structure(x[id],nErr = length(id))
} else NULL)
}
)
names(RES) <- NULL
nErr <- which(sapply(RES, function(x) !is.null(attr(x,"error"))))
structure(RES,
nsim=nsim,
X=.LIST2ROW(X),
iseed=iseed,
error=if(length(nErr) > 0L) nErr else NULL,
class="simQL", call=sys.call())
}
# Cholesky decomposition of covariance matrices
varCHOLdecomp <- function(matList) {
# n <- NROW(matList[[1]])
# lx <- n*(n+1)/2
if(!is.matrix(matList[[1]]))
stop("Elements of list must be matrices.\n", call.=TRUE)
doIt <- function(i,x) {
m <- try(chol(x[[i]],pivot = FALSE), silent=TRUE )
if(inherits(m,"try-error") || anyNA(m) )
return( .qleError(message="Cholseky decomposition failed: ",
call=match.call(),error=m))
as.vector(m[col(m)>=row(m)])
}
lapply(1:length(matList),doIt,x=matList)
}
.resampleCov <- function(T,num){
n <- nrow(T)
apply(
sapply(1:num,
function(i){
TS <- apply(T,2,sample,replace=TRUE)
stopifnot(NROW(TS)==n)
m <- var(TS,na.rm=TRUE)
m[col(m)>=row(m)]
}
),1,var)
}
#' @name setQLdata
#'
#' @title Set quasi-likelihood (QL) data
#'
#' @description Aggregate the data for quasi-likelihood estimation
#'
#' @param runs list or matrix of simulation results from \code{\link{simQLdata}}
#' @param X list or matrix of model parameters
#' @param var.type character, "\code{cholMean}" (default), whether to Cholesky decompose variance
#' matrices either for sample average variance approximation or kriging variance matrices
#' @param Nb numeric, number of bootstrap samples for kriging the variance matrix,
#' only if `\code{var.type}`=`\code{kriging}`, default is zero which uses no bootstrapping
#' @param na.rm if \code{TRUE} (default), remove `NA` values from simulation results
#' @param verbose if \code{TRUE}, print intermediate output
#'
#' @return
#' An object of class \code{QLdata} as a data frame with columns:
#' \item{X}{ Model parameters (\code{n=1},...,\code{q}) }
#' \item{mean}{ Results of simulations (\code{m=1},...,\code{p}) }
#' \item{var}{ Simulation variances of statistics (\code{m=1},...,\code{p}) }
#' \item{L}{ if applicable, Cholesky decomposed terms of variance matrices of statistics (k=1,...,(m*(m+1)/2))}
#' where `\code{p}` denotes the number of user defined statistics and `\code{q}` the problem dimension, that is,
#' the number of parameters to be estimated.
#'
#' The following items are stored as attributes:
#'
#' \item{type}{ see above}
#' \item{nsim}{ number of simulations at each point }
#' \item{xdim}{ parameter dimension}
#' \item{nWarnings}{ Number of warnings during simulations}
#' \item{nErrors}{ Number of errors during simulations}
#' \item{nIgnored}{ List of parameters ignored (because of failures)}
#'
#' @details
#' The function aggregates all neccessary data for quasi-likelihood estimation storing the
#' sample locations and the corresponding simulation results of the statistics.
#' If `\code{X}` equals \code{NULL}, then the sample points are taken from the object `\code{runs}`.
#'
#' The most critical part is the decomposition of variance matrices for each sample location unless `\code{var.type}`
#' equals "\code{const}" in which case a constant variance matrix approximation is expected later by function \code{\link{qle}}.
#' The Cholesky decompositions are used for average approximations of the variance matrix of the statistics when calculating the
#' quasi-score vector or any type of function criterion based on the Mahalanobis distance or quasi-deviance.
#' If these fail for any reason we try to ignore, if possible, the corresponding sample points and exclude them
#' from all following calculations. Unless a constant estimate of the variance matrix, the default is to approximate the
#' variance at any model parameter by either a kriging interpolation of the \emph{Cholesky} terms or as an average over
#' all sampled variance matrices also based on the decomposed Cholesky terms (see vignette).
#'
#' @examples
#' # simulate model statistics at LHS design
#' sim <- simQLdata(sim =
#' function(x,cond) {
#' X <- rlnorm(cond$n,x[1],x[2])
#' c("MED"=median(X),"MAD"=mad(X))
#' },
#' cond=list("n"=10),
#' nsim=10, N=10, method="maximinLHS",
#' lb=c("mu"=-1.5,"sd"=0), ub=c("mu"=2,"sd"=1))
#'
#' # setup the QL data model using defaults
#' qldata <- setQLdata(sim,verbose=TRUE)
#'
#' @author M. Baaske
#' @rdname setQLdata
#' @export
setQLdata <- function(runs, X = NULL, var.type="cholMean",
Nb = 0, na.rm = TRUE, verbose = FALSE) {
nErrors <- 0
nWarnings <- 0
nsim <- attr(runs,"nsim")
stopifnot(!is.null(nsim) || is.numeric(nsim))
if(class(runs) != "simQL" || !is.list(runs))
stop("Argument `runs` must be of class `simQL` and `list` type.")
if(is.null(X))
X <- attr(runs,"X")
if(!is.matrix(X))
X <- .LIST2ROW(X)
.extract <- function(dataT) {
if( !is.null(attr(dataT,"error")) || inherits(dataT,"error")) {
msg <- paste0("Value of statistics has errors: ","\n")
message(msg)
return(.qleError(message=msg,call=match.call(),error=dataT))
}
if(is.list(dataT))
dataT <- do.call(rbind,dataT)
if(na.rm)
dataT <- dataT[!rowSums(is.na(dataT)) > 0L,,drop=FALSE]
V <- try( var(dataT, na.rm=na.rm),silent=TRUE)
if (is.na(V) || !is.numeric(V) || inherits(V,"try-error") ) {
msg <- paste0("Failed to get variance matrix of statistics.","\n")
message(msg)
return(.qleError(message=msg,call=match.call(),error=V))
}
is.pos <- .isPosDef(V)
if(is.pos != 0L) {
warning("Covariance matrix not positive definite!")
}
vmat <- as.matrix(V)
ret <- list("V" = vmat,
"mstat" = colMeans(dataT),
"vars" = diag(vmat),
"is.pos" = is.pos)
if(Nb > 0 && var.type == "kriging"){
ret$Lb <- try(.resampleCov(dataT,Nb),silent=TRUE)
if(inherits(ret$Lb,"try-error"))
attr(ret,"error") <- TRUE
}
ret
}
res <- tryCatch(
lapply(runs,.extract),
error = function(e) e)
if(.isError(res)){
msg <- .makeMessage("Extracting values of statistics failed.")
message(msg)
return(.qleError(message=msg,call=sys.call(),error=res))
}
ok <- which(
sapply(res,
function(x){
if(.isError(x)) {
ok <- FALSE # try to ignore this location
nErrors <<- nErrors+1
} else {
ok <- (x$is.pos == 0L)
if(!ok)
nWarnings <<- nWarnings+1
}
ok
}))
notOk <- integer(0)
msg <- "Normal completion"
nIgnored <-
if(length(ok) < length(res)) {
notOk <- which(!(1:length(res) %in% ok))
X[notOk,,drop=FALSE]
} else NULL
if(nErrors > 0L) {
msg <- paste0(c("Errors at sample points: ", notOk), collapse = " ")
message(msg)
} else if(nWarnings > 0L) {
msg <- paste(c("Variance matrix is not positive definite at sample points: ", notOk) ,collapse=" ")
message(msg)
}
if(!is.null(nIgnored) && nrow(nIgnored) > 0L){
message(.makeMessage("Ignored a total of ", nrow(nIgnored), " sample points.","\n"))
}
if(length(ok) == 0L ) {
msg <- .makeMessage("No parameters match the simulation conditions: ","\n")
message(msg)
return(
.qleError(message=msg,
call=match.call(),
xdim=ncol(X),
nsim=nsim,
nWarnings=nWarnings,
nErrors=nErrors,
nIgnored=nIgnored,
error=res)
)
}
# Cholesky decompositions
cvmats <-
if(var.type != "const") {
# covariance decompositions
if(verbose)
cat("Cholesky decompositon of covariance matrices...\n")
vmats <- lapply(res[ok],"[[","V" )
L <- try(varCHOLdecomp(vmats),silent=TRUE)
if(.isError(L)) {
msg <- paste0("Cholseky decomposition failed: ","\n")
message(msg)
stop(.qleError(message=msg,call=match.call(),error=L))
}
ok2 <- which(sapply(L,function(x) !.isError(x)))
if(length(ok2) == 0L){
msg <- paste0("All Cholesky decompositions failed: ","\n")
message(msg)
return(.qleError(message=msg,call=match.call(),error=L))
} else if(length(ok2) < length(L))
warning(paste0("A total of ",length(L)-length(ok2), " Cholesky decomposition(s) failed."))
cvmats <- as.data.frame(do.call(rbind, L[ok2]))
colnames(cvmats) <- paste("L", rep(1:NCOL(cvmats)),sep="")
# bootstrap variances of covariances
# a local nugget variances
if(Nb > 0 && var.type == "kriging"){
Lb <- lapply(res[ok],"[[","Lb")
Lbmats <- as.data.frame(do.call(rbind,Lb[ok2]))
colnames(Lbmats) <- paste("Lb", rep(1:NCOL(Lbmats)),sep="")
cvmats <- cbind(cvmats,Lbmats)
}
if(length(ok) != length(ok2))
ok <- ok2
cvmats
} else NULL
X <- X[ok,,drop=FALSE]
mstats <- do.call(rbind,lapply(res[ok],"[[","mstat"))
mvars <- do.call(rbind,lapply(res[ok],"[[","vars"))
qld <- as.data.frame(cbind(rbind(X),mstats,mvars))
nms <-
if(is.null(colnames(X)) | is.null(colnames(mstats)) | is.null(colnames(mvars))) {
c(colnames(qld[seq(ncol(X))]),
paste("mean.T",rep(1:NCOL(mstats)),sep=""),
paste("var.T",rep(1:NCOL(mvars)),sep=""))
} else {
c(colnames(X),
paste0("mean.",colnames(mstats)),
paste0("var.",colnames(mvars)))
}
colnames(qld) <- nms
if(!is.null(cvmats)){
qld <- cbind(qld,data.frame(cvmats))
}
structure(qld,
xdim=ncol(X),
nsim=nsim,
Nb=if(var.type != "kriging") 0 else Nb,
nWarnings=nWarnings,
nErrors=nErrors,
nIgnored=nIgnored,
message=msg,
call=sys.call(),
class=c("QLdata","data.frame"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.