R/krige.R
In qle: Simulation-Based Quasi-Likelihood Estimation

# Copyright (C) 2018 Markus Baaske. All Rights Reserved.
# This code is published under the GPL (>=3).
#
# File: 	krige.R
# Date:  	14/03/2018
# Author: 	Markus Baaske
#
# Implements kriging prediction, variance approximation methods,
# quasi-deviance and Mahalanobis distance function as a criterion
# for QL estimation

#' @name estim
#'
#' @title Kriging prediction and numerical approximation of derivatives
#'
#' @description 	
#'
#' @param models 	object of class \code{krige} either as a list of covariance models or
#' 	 				class \code{covModel} as a single covariance model, see \code{\link{setCovModel}}
#' @param points 	matrix or list of points to predict the sample means of statistics
#' @param Xs		matrix of sample points
#' @param data		data frame of sample means of statistics at sampled points
#' @param krig.type name of kriging type, either "\code{dual}" (default) or "\code{var}"
#'
#' @return
#'  \item{estim}{ list of predicted values of sample means of statistics (including prediction
#' 		 variances if `\code{krig.type}` equals to "\code{var}")}
#'  \item{jacobian}{ list of Jacobians at predicted values of sample means of statistics}  
#'
#' @details The function can be used to predict any values by kriging given a covariance model. Each covariance model is given as an
#'  element of the list `\code{models}` including its own trend model and covariance function name. There are two types of kriging predictors
#'  available. First, the \emph{dual kriging} predictor, set by `\code{krig.type}`="\code{dual}" or the one based on the calculation of prediction
#'  variances, setting `\code{krig.type}` to "\code{var}". Both types result in exactly the same predicted values and only differ by whether or not
#'  kriging variances should be computed. The data, e. g. sample means of each statistic, must be given as column vectors where each row corresponds
#'  to a sample point in the data frame given in the argument `\code{data}`. 
#'
#' @examples 
#' data(normal) 
#' 
#' X <- as.matrix(qsd$qldata[,1:2])
#' p <- c("mu"=2,"sd"=1)
#' 
#' # get simulated statistics at design X
#' Tstat <- qsd$qldata[grep("^mean[.]",names(qsd$qldata))]
#' 
#' # low level prediction, variances and weights
#' estim(qsd$covT,p,X,Tstat,krig.type="var")
#' 
#' # Jacobian 
#' jacobian(qsd$covT,p,X,Tstat)
#'  
#'    
#' @author M. Baaske
#' @rdname estim
#' @export
estim <- function(models, points, Xs, data,
		  krig.type=c("dual","var","both")) {   
	UseMethod("estim",models)
}

#' @method estim krige 
#' @export
estim.krige <- function(models, points, Xs, data,
				krig.type = c("dual","var","both")) { 
	krig.type <- match.arg(krig.type)
		
	if(!is.list(data) || length(data)!=length(models))
		stop("Expected 'data' as  a list of same length as 'models'.")	 		 	 
	
	if(!is.matrix(points))
		points <- .LIST2ROW(points)
	
	.Call(C_kriging,Xs,data,points,models,krig.type)
}

#' @method estim covModel
#' @export 
estim.covModel <- function(models, points, Xs, data,
					krig.type=c("dual","var","both")) {	
	krig.type <- match.arg(krig.type)		
	if(!is.matrix(points))
		points <- .LIST2ROW(points)
	
	.Call(C_kriging, Xs, as.data.frame(data), points, list(models), krig.type)
}

#' @title Jacobian
#'
#' @description Jacobian of mean values of statistics 
#' 
#' @inheritParams estim
#'
#' @details
#'   The function `\code{jacobian}` computes the partial derivatives of sample means of the statistics
#'   as columns and for each component of the parameter vector as rows by forward difference approximations.
#' 
#' @rdname estim
#' 
#' @export
jacobian <- function(models, points, Xs, data,
				krig.type=c("dual","var","both")) {
	UseMethod("jacobian",models)
}

#' @method jacobian krige 
#' @export
jacobian.krige <- function(models, points, Xs, data,
					krig.type=c("dual","var","both")) {
	krig.type <- match.arg(krig.type)	
	if(!is.list(data) || length(data)!=length(models))
		stop("Expected \'data\' as  a list of same length as 'models'.")	
	if(!is.list(points))
		points <- .ROW2LIST(points)	
	.Call(C_estimateJacobian,Xs,data,points,models,krig.type)
}

#' @method jacobian covModel 
#' @export
jacobian.covModel <- function(models, points, Xs, data,
						krig.type=c("dual","var","both")) {
	krig.type <- match.arg(krig.type)	
	if(!is.list(points))
	  points <- .ROW2LIST(points)
  
	.Call(C_estimateJacobian,Xs,as.data.frame(data),points,list(models),krig.type)
}

#' @title Kriging the sample means of statistics
#'
#' @description
#'  \describe{
#'    \item{\code{predictKM},}{wrapper for kriging the sample means of statistics} 	  
#'	  \item{\code{varKM},}{ calculate the kriging prediction variances}
#'	  \item{\code{extract},}{ extract the results of kriging} 
#'  } 
#' 
#' @details For a list of fitted covariance models the function \emph{predictKM} predicts the
#' 	 sample means of statistics at (unsampled) points, calculates the prediction variances, if applicable,
#'   at these points and extracts the results. Note that, since we aim on predicting the "error free" value of the sample means,
#'   we use the \emph{smoothing} kriging predictor as described in [2, Sec. 3.7.1].
#'
#' @param models   list of covariance models, see \code{\link{setCovModel}}
#' @param ... 	   further arguments passed to function \code{\link{estim}}
#' 
#' @return \item{predictKM}{ list of kriging predicted values}
#' 
#' @examples 
#' data(normal)
#' X <- as.matrix(qsd$qldata[,1:2])
#' p <- c("mu"=2,"sd"=1)
#' 
#' # get simulated statistics at design X
#' Tstat <- qsd$qldata[grep("^mean[.]",names(qsd$qldata))]
#' 
#' # predict and extract 
#' predictKM(qsd$covT,p,X,Tstat)
#' 
#' # prediction variances
#' varKM(qsd$covT,p,X,Tstat)
#' 
#' @rdname krige
#' @export
predictKM <- function(models,...) {
	km <- estim(models,...)
	if(.isError(km)) {
	  message("Kriging estimation (mean) failed.\n")
	  return(km)
  	}
	extract(km, type="mean")
}

#' @return \item{varKM}{ list of kriging prediction variances}
#' 
#' @rdname krige
#' @export
varKM  <- function(models, ...) {
	km <- estim(models, ..., krig.type="var")
	if(.isError(km)) {
	  message("Kriging prediction error estimation (var) failed.\n")
	  return(km)
	}
 	extract(km, type="sigma2")
}


#' @param X	 	kriging result
#' @param type  return type of results, see details below
#'
#' @details
#'  The function \emph{extract} either returns the predicted values, the
#'  prediction variances or the kriging weights for each point. 
#'
#' @return \item{\code{extract}}{ matrix of corresponding values (see details)}  
#'
#' @rdname krige
#' @export
extract <- function(X, type = c("mean","sigma2","weights")) UseMethod("extract",X)

#' @method extract krigResult 
#' @export
extract.krigResult <- function(X, type=c("mean","sigma2","weights")) {
	type <- match.arg(type)
	switch(type,
	   "weights" = { 
		   if(!is.null(attr(X,"weights")))		# only for dual kriging weights!
			 return( t(do.call(rbind,(attr(X,"weights")))) )
		   lapply(X,function(x) {
			 w <- matrix(unlist(x$weights),nrow=length(x$weights[[1]]))
			 colnames(w) <- names(x$weights)
			 w
		 	})	
	   },
	   do.call(rbind,lapply(X,"[[",type))
	)
}

# intern
#' @importFrom expm logm
varLOGdecomp <- function(L) {
	vmats <- try(lapply(1L:nrow(L), function(i) .chol2var(unlist(L[i,]))), silent=TRUE)
	if(inherits(vmats,"try-error")) {
	  return(simpleError(.makeMessage("Matrix logarithm of covariance matrices failed.")))
	}
	decomp <- lapply(vmats,
			   function(Xs) {
				   m <- try(expm::logm(Xs,method="Eigen"),silent=TRUE)
				   if(inherits(m,"try-error"))
					return (m)
				   as.vector(m[col(Xs)>=row(Xs)])
				 }
		       )
	as.data.frame(unlist(do.call(rbind,decomp)), ncol=length(decomp[[1L]] ) )
}

#' @title Variance matrix approximation
#'
#' @description Approximating the variance-covariance matrix of statistics
#'
#' @param qsd		object of class \code{\link{QLmodel}}
#' @param W			weight matrix for weighted average approximation
#' @param theta	    parameter vector for weighted average approximation 
#' @param cvm		list of fitted cross-validation models, see \code{\link{prefitCV}}
#' @param doInvert	if \code{TRUE}, return the inverse of the variance matrix approximation
#'
#' @return
#' 	List of variance matrices with the following structure:
#' 	\item{VTX}{ variance matrix approximation}
#'  \item{sig2}{kriging prediction variances of statistics at `\code{theta}`}
#'  \item{var}{ matrix `\code{VTX}` with added variances `\code{sig2}` to the diagonal terms}
#'  \item{inv}{ if applicable, the inverse of either `\code{VTX}` or `\code{var}`}
#'
#' @details	The function estimates the variance matrix of statistics at some (unsampled) point `\code{theta}` by either
#'  averaging (the \emph{Cholesky} decomposed terms or matrix logarithms) over all simulated variance matrices
#'  of statistics at previously evaluated points of the parameter space or by a kriging approach which treats the Cholesky
#'  decomposed terms of each variance matrix as a data vector for kriging.
#' 
#'  In addition, a Nadaraya-Watson kernel-weighted average approximation can also be applied in order to bias the variance
#'  estimation towards a more locally weighted estimation, where smaller weights are assigned to points being more
#'  distant to an estimate of the model parameter `\code{theta}`. A reasonable symmetric weighting matrix 
#'  `\code{W}` of size equal to the problem dimension, say \code{q}, can be freely chosen by the user. In addition, the user can select
#'  different types of variance averaging methods such as "\code{cholMean}", "\code{wcholMean}", "\code{logMean}", "\code{wlogMean}",
#'  "\code{kriging}" (kriging variance matrix adding three times the prediction standard errors for each Cholesky entry)
#'  defined by `\code{qsd$var.type}`, where the prefix "\code{w}" indicats its corresponding weighted version of
#'  the approximation type. If `\code{theta}` is not given, then no prediction variances are included and thus the variance matrix estimate of the statistics only refers to the variances due to simulation replications
#'  and not the ones due to the use of kriging approximations of the statistics. Otherwise, the mean variance matrix estimate is given by
#'  \deqn{ \hat{V}+\textrm{diag}(\sigma(\theta)),} where \eqn{\hat{V}} denotes one of the above variance approximation types.
#' 
#'  The prediction variances \eqn{\sigma} are either derived from the kriging results of statistics or based on a (possibly more robust)
#'  cross-validation (CV) approach, see vignette for details.
#'    
#' @examples 
#'  data(normal)
#'  # average approximation of variance matrices
#'  covarTx(qsd,theta=c("mu"=2,"sd"=1))
#' 
#' @author M. Baaske
#' @rdname covarTx
#' @export
covarTx <- function(qsd, W = NULL, theta = NULL, cvm = NULL, doInvert = FALSE)
{		
	xdim <- attr(qsd$qldata,"xdim")
	Xs <- as.matrix(qsd$qldata[seq(xdim)])
	
	nstat <- length(qsd$covT)
   	var.type <- qsd$var.type
	krig.type <- qsd$krig.type
	Tnames <- names(qsd$obs)
	dataT <- qsd$qldata[(xdim+1L):(xdim+nstat)]
	dataL <- qsd$qldata[(xdim+2*nstat+1L):ncol(qsd$qldata)]					# Cholesky decomposed terms
	nc <- ncol(dataL)
		
	sig2 <-
	 if(!is.null(theta)) {
		 if(!is.null(cvm)) {		  
		 	tryCatch({							
				Y <- estim(qsd$covT,theta,Xs,dataT,krig.type="var")
				# cross-validation variance/RMSE of statistics
				cverrorTx(theta,Xs,dataT,cvm,Y,"cve")		
			 }, error = function(e) { e })
		 
		 } else if((krig.type == "var")) {
		       try(varKM(qsd$covT,theta,Xs,dataT),silent=TRUE)
		 } 
	} else NULL 
	
	 
	if(.isError(sig2)) {
		message(.makeMessage("Failed to get prediction variances. "),
				if(inherits(sig2,"error")) conditionMessage(sig2))		
		sig2 <- NULL
	}	
	if(!is.null(W)) {
		if(!is.matrix(W))
		 stop("`W` has to be a matrix.")
		stopifnot(nrow(W)==ncol(W) && nrow(W)==xdim)
	}
	
	tryCatch({
		## get list of covariance matrices
		## one for each prediction point
		if(var.type %in% c("logMean","wlogMean")) {
			mlogV <- try(varLOGdecomp(dataL),silent=TRUE)
			if(.isError(mlogV)) {
				msg <- paste0("Matrix logarithm failed.")
				message(msg)
				return(.qleError(message=msg,call=match.call(), error=mlogV ) )
			}
			err <- unlist(lapply(mlogV, function(x) .isError(x)))
			if(any(err)) {
				msg <- paste0("Matrix logarithm failed: ")
				message(msg)
				return(.qleError(n=msg,call=match.call(),error=mlogV))
			}			
			if(var.type=="logMean" || is.null(W) || is.null(theta))
			  return (varCHOLmerge(rbind(colMeans(mlogV)),sig2, var.type, doInvert))
		    		  	
			d <- try(exp(-.distX(Xs,rbind(theta),W)*0.5),silent=TRUE)
			if(inherits(d,"try-error") || !is.numeric(d) || any(is.na(d))) {
			   msg <- paste0("Weighted distances calculation error: NAs values possibly produced.")		
			   message(msg)
			   return(.qleError(message=msg,call=match.call(),error=d))
			}
			varCHOLmerge(rbind(colSums(mlogV*matrix(rep(d,nc),nrow(dataL),nc))/sum(d)),
					sig2,var.type,doInvert,Tnames)
			
		} else if(var.type %in% c("cholMean","wcholMean")) {
			if(var.type=="cholMean" || is.null(W) || is.null(theta))
			  return (varCHOLmerge(rbind(colMeans(dataL)),sig2,var.type,doInvert,Tnames))	
			
			# weighting matrix has to be inverted!
			# but inverse of QI as variance of theta inverted is QI, thus W = I.
			d <- try(exp(-.distX(Xs,rbind(theta),W)*0.5),silent=TRUE)
			if(inherits(d,"try-error") || !is.numeric(d) || any(is.na(d))) {
				msg <- paste0("Weighted distances calculation error: NAs values possibly produced.")
				message(msg)
				return(.qleError(message=msg,call=match.call(),error=d))
			} 				
			varCHOLmerge(rbind(colSums(dataL*matrix(rep(d,nc),nrow(dataL),nc))/sum(d)),
					sig2,var.type,doInvert,Tnames)
		} else if(var.type == "kriging") {			
			## the following krigin of variance matrix is only used for
			## functions in R code, any other code calling C does its own
			## kriging more efficiently
			if(is.null(qsd$covL) || is.null(theta))
			  stop("Kriging the variance matrix requires arguments `qsd$covL` and `theta` to be given.")			
					  	
			# nCovL: number of Cholesky decomposed terms (excluding bootstrap variances)		
			L <- estim(qsd$covL,theta,Xs,dataL[1:length(qsd$covL)],krig.type="var")			
			Lm <- do.call(rbind,sapply(L,"[","mean"))
			Lsig <- try(sqrt(do.call(rbind,sapply(L,"[","sigma2"))),silent=TRUE)
			if(inherits(Lsig, "try-error") || anyNA(Lsig))
			 stop("Could not extract Kriging variances of variance matrix interpolation models.")		 	
			VTX <- varCHOLmerge(Lm,NULL,var.type,doInvert,Tnames)		
			lapply(seq_len(NROW(Lm)),
				function(i) {
					Sig2 <- try(.chol2var(as.numeric(Lsig[i,])),silent=TRUE)
					if(inherits(Sig2,"try-error")){
					    msg <- paste0("'chol2var' failed to compute the kriging variance of (kriging) variance matrix models.")
						message(msg)
						return(.qleError(message=msg,error=Sig2))
				  	}
				    V <- VTX[[i]]
					res <- 
					 structure(
					  list("VTX"=V$VTX,
						   "var"=V$VTX+diag(diag(Sig2),nstat), 	# with kriging variance of statistics mean 'sig2'				   				  
						   "sig2"=diag(Sig2))									# a matrix: kriging variances of variance matrix models
		   			  )					  
					  if(doInvert) {
						  res$inv <- try(do.call("gsiInv",list(res$var)),silent=TRUE)
						  if (inherits(res$inv,"try-error") || !is.numeric(res$inv) || any(is.na(res$inv)))
						   message("Variance matrix inversion failed for kriging the variance matrix.")			
					  }
					  res
				}
			)
		} else {			
			stop("Variance matrix of statistics cannot be computed.")
		}	
	}, error = function(e) {
		msg <- paste0("Covariance matrix estimation failed: ", conditionMessage(e),".\n") 
		message(msg)
	    return(.qleError(message=msg,call=match.call(),error=e))
	   }
	)
}

# helpers (intern)
.chol2var <- function(Xs) {
	n <- (-1 + sqrt(1 + 8*length(Xs)))/2;
	m <- matrix(0,n,n)
	m[col(m)>=row(m)] <- Xs
	return( crossprod(m) )
}

.chol2Upper <- function(Xs) {
	n <- (-1 + sqrt(1 + 8*length(Xs)))/2;
	m <- matrix(0,n,n)
	m[col(m)>=row(m)] <- Xs
	return( m )
}

.mergeMatrix <- function(Xs) {
	n <- (-1 + sqrt(1 + 8*length(Xs)))/2;
	m <- matrix(0,n,n)
	m[col(m)>=row(m)] <- Xs
	m[lower.tri(m)] = t(m)[lower.tri(m)]
	return( m )
}

# sig2 is matrix: rows are kriging variance => put as diagonal matrix
varCHOLmerge <- function(Xs, sig2=NULL,var.type="cholMean",doInvert=FALSE,Tnames = NULL) UseMethod("varCHOLmerge",Xs)
varCHOLmerge.matrix <- function(Xs,sig2=NULL,var.type="cholMean", doInvert=FALSE,Tnames = NULL) {
	if(!is.null(sig2) && is.matrix(sig2) )
		structure(lapply(seq_len(NROW(sig2)),
			function(i) varCHOLmerge(Xs[1L,],sig2[i,],var.type,doInvert,Tnames) ),"var.type"=var.type)
	else structure(list(varCHOLmerge(Xs[1L,],NULL,var.type,doInvert,Tnames)),"var.type"=var.type)
}

# intern
#' @importFrom expm expm
varCHOLmerge.numeric <- function(Xs, sig2=NULL, var.type="cholMean", doInvert=FALSE, Tnames = NULL) {
   err <- 
	  function(e) {
			message(paste0(.makeMessage("try to invert again...\n")))
			tmp <- try(do.call(gsiInv,list(varMat)),silent=TRUE)
			if (!is.numeric(tmp) || !is.matrix(tmp) || any(is.na(tmp))) {
				msg <- .makeMessage(paste0("Matrix inversion error: "),conditionMessage(e))
				message(msg)
			    return (.qleError(message=msg, call=sys.call(), error=e))
		 	}
			return (tmp)
	  }	
	VTX <- try({
	 if(var.type %in% c("cholMean","wcholMean","kriging")) 
		.chol2var(Xs)
	 else expm::expm(.mergeMatrix(Xs))		
	}, silent=TRUE)		
	if(inherits(VTX,"try-error"))
	  stop(paste0("Failed to merge covariance matrix by: ",var.type))
  	if(!is.null(Tnames))
     dimnames(VTX) <- list(Tnames,Tnames)
    res <- list("VTX"=VTX)

	if(!is.null(sig2)) {
		n <- length(sig2)
		stopifnot(nrow(VTX)==n)
		res$sig2 <- sig2
		res$var <- VTX + diag(sig2,n,n)

		if(doInvert) {
			res$inv <- try(do.call("gsiInv",list(res$var)),silent=TRUE)
			if (inherits(res$inv,"try-error") || !is.numeric(res$inv) || any(is.na(res$inv)))
			  return(.qleError(message="Variance matrix inversion failed: ",call=sys.call(),error=res))			
		}
	} else {
		if(doInvert) {
			if(var.type %in% c("logMean","wlogMean")) {
				minv <- try( expm::expm(-.mergeMatrix(Xs)),silent=TRUE)
				if(.isError(minv)){
					msg <- paste0("Merge matrix (logarithm) failed: ")
					message(msg)
					return(.qleError(message=msg,call=sys.call(),error=minv,Xs))
				}
				res$inv <- minv
		 	} else {
				minv <-
					tryCatch({
					  varMat <- .chol2Upper(Xs)
					  do.call("chol2inv",list(varMat))
					}, error = err)
				if(.isError(minv))
				  return(.qleError(message="Not a matrix: ",call=sys.call(),error=minv))
				res$inv <- minv
			}
		}
	}	
	return(res)
}

#' @name quasiDeviance
#'
#' @title Quasi-deviance computation
#'
#' @description
#'  The function computes the quasi-deviance (QD) at parameters (called points) of the parameter
#'  search space including the computation of the quasi-score vector and optionally its variance matrix.   
#'
#' @param points		list or matrix of points where to compute the QD (a numeric vector is considered to be a (multidimensional) point)
#' @param qsd		    object of class \code{\link{QLmodel}} 
#' @param Sigma		    variance matrix estimate of statistics (see details)
#' @param ...		    further arguments passed to \code{\link{covarTx}}
#' @param cvm			list of cross-validation models (see \code{\link{prefitCV}})
#' @param obs	 	    numeric vector of observed statistics, this overwrites `\code{qsd$obs}`, if supplied
#' @param inverted 		logical, \code{FALSE} (default), currently ignored
#' @param w			    numeric value, \code{=0.5} (default) as scalar weight, \code{0<=w<=1}, for evaluation of candidate points
#' @param check			logical, \code{TRUE} (default), whether to check input arguments
#' @param value.only  	if \code{TRUE} only the values of the QD are returned
#' @param na.rm 		logical, if \code{TRUE} (default) remove `Na`s from the result
#' @param cl			cluster object, \code{NULL} (default), of class \code{MPIcluster}, \code{SOCKcluster}, \code{cluster}
#' @param verbose   	logical, \code{TRUE} for intermediate output
#'
#' @return Numeric vector of QD values, if values only, or a list with elements:
#' \item{value}{ quasi-deviance value}
#' \item{par}{ parameter estimate}
#' \item{I}{ quasi-information matrix}
#' \item{score}{ quasi-score vector}
#' \item{jac}{ Jacobian of sample average statistics}
#' \item{varS}{ estimated variance of quasi-score, if applicable}
#' \item{Iobs}{ observed quasi-information}
#' \item{qval}{ quasi-deviance using the inverse of `\code{varS}` as a weighting matrix}
#' 
#'  The matix `\code{Iobs}` is called the \eqn{\emph{observed quasi-information}} (see [2, Sec. 4.3]),
#'  which, in our setting, can be calculated at least numerically as the Jacobian of the quasi-score vector.
#'  Further, `\code{varS}` denotes the approximate variance-covariance matrix of the quasi-score given the observed
#'  statistics and serves as a measure of estimation precision (see [1] and the vignette, Sec. 3.2).
#'   
#' @details The function calculates the QD (see [1]). It is the primary function criterion to be minimized
#'   for estimating the unknown model parameter by \code{\link{qle}} and involves the computation of the quasi-score
#'   and quasi-information matrix at a particular parameter. From a statistical point of view, the QD can be seen as
#'   a generalization to the \emph{efficient score statistic} (see [3] and the vignette) and is used as a decision
#'   rule in the estimation function \code{\link{qle}} in order to hypothesise about the true model parameter. A modified value of
#'   the QD, using the inverse of the variance of the quasi-score w.r.t. the kriging approximation models of the statistics 
#'   is stored in the result `\code{qval}`.
#'    
#'   Quasi-deviance values which are relatively small (compared to the empirical quantiles of its approximate chi-squared
#'   distribution) suggest a solution to the quasi-score equation and hence could identify the unknown model parameter
#'   in some probabilistic sense. Estimated parameters including different observed statistics can be investigated by 
#'   a MC goodness-of-fit test, see \code{\link{qleTest}}.
#' 
#'   Further, if we use a weighted variance average approximation of statistics (see \code{\link{covarTx}}),
#'   then the QD value is calculated rather locally w.r.t. to an estimated parameter `\code{theta}`. A constant variance
#'   matrix is also applicable to the computation of the QD. However, if supplied, `\code{Sigma}` is used
#'   as is with kriging variances added at the `\code{points}` as diagonal terms (see also \code{\link{mahalDist}}).    
#' 
#' 	 \subsection{Use of prediction variances}{ 
#' 	 In order to not only account for the simulation variance but additionally for the approximation error of the
#'   quasi-score vector we include the prediction variances of the involved statistics either based on
#'   a cross-validation or kriging approach. If `\code{cvm}` is not given, then the prediction variances are obtained based
#'   on the kriging procedure applied to the statistics. The variance matrix `\code{varS}` of the
#'   quasi-score vector is part of the return list. Besides the quasi-information matrix the observed quasi-information matrix
#'  (as a numerically derived Jacobian, given by `\code{Iobs}`, of the quasi-score vector) is also returned. A good match between
#'   those two matrices suggests an approximate root if the corresponding QD value is relatively small. This can be further investigated
#'   using the function \code{\link{checkMultRoot}}.
#' 
#'   Alternatively, also CV-based prediction variances (which involve additional covariance models given by `\code{cvm}` for each left out
#'   sample point) for each single statistic can be used to produce relatively robust parameter estimation results but for the price of
#'   much higher computational costs. In practice this might overcome the general tendency inherent to the plug-in kriging predictor underestimating
#'   the prediction variances of the sample means of the statistics. In particular, the CV approach is recommended in case one favours kriging type for
#'   the approximation of the variance matrix. 
#'   }
#' 
#' 
#' @examples
#' data(normal)
#' quasiDeviance(c(2,1), qsd)
#'  
#' @author M. Baaske
#' @rdname quasiDeviance
#' @export
quasiDeviance <- function(points, qsd, Sigma = NULL, ..., cvm = NULL, obs = NULL, 
					 inverted = FALSE, check = TRUE, w = 0.5, value.only=FALSE, na.rm = TRUE,
					  cl = NULL, verbose=FALSE)
{		
	if(check)
	 .checkArguments(qsd,Sigma=Sigma) 
 	stopifnot(is.numeric(w))
	if(!is.list(points))
	 points <- .ROW2LIST(points) 	
 	X <- as.matrix(qsd$qldata[seq(attr(qsd$qldata,"xdim"))])
 	
	# overwrite (observed) statistics	
	if(!is.null(obs)) {
		obs <- unlist(obs)
		if(anyNA(obs) | any(!is.finite(obs)))
			warning("`NA` or `Inf` values detected in `obs`.")
		if(!is.numeric(obs) || length(obs) != length(qsd$covT))
		 stop("`obs` must be a (named) `numeric` vector or list of length equal to the given statistics in `qsd`.")
		qsd$obs <- obs
	}
	# Unless Sigma is given always continuously update variance matrix.
	# If using W, theta or Sigma for average approximation, then
	# at least update added kriging prediction variances at each point.
	tryCatch({				
		if(!(qsd$var.type %in% c("kriging","const"))){
			# 'Sigma' is inverted at C level because of added prediction variances
			Sigma <- covarTx(qsd,...,cvm=cvm)[[1L]]$VTX
		} else if(qsd$var.type=="const" && !inverted){
			# Only for constant Sigma, which is used as is!
			if(is.null(Sigma))
			  stop("`Sigma` cannot be NULL if used as a constant variance matrix.")			
			inverted <- TRUE			
			Sigma <- try(gsiInv(Sigma),silent=TRUE)
			if(inherits(Sigma,"try-error")) {
				msg <- paste0("Inversion of constant variance matrix failed.")
				message(msg)
				return(.qleError(message = msg,error=Sigma))
			}
		}
		
	    qlopts <- list("varType"=qsd$var.type, "useCV"=!is.null(cvm))
					   
		ret <-
		  if(length(points) >= 100 && (length(cl) > 1L || getOption("mc.cores",1L) > 1L)){
				m <- if(!is.null(cl)) length(cl) else getOption("mc.cores",1L)		
				M <- .splitList(points, m)
				names(M) <- NULL
			    unlist(
				   do.call(doInParallel,
					  c(list(X=M,
					    FUN=function(points, qsd, qlopts, X, Sigma, cvm, value.only, w) {
							.Call(C_quasiDeviance,points,qsd,qlopts,X,Sigma,cvm,value.only,w)	 
						}, cl=cl),
				     list(qsd, qlopts, X, Sigma, cvm, value.only, w))),
	             recursive = FALSE)
 							
		} else {
			.Call(C_quasiDeviance,points,qsd,qlopts,X,Sigma,cvm,value.only,w)
		}
		
		# check for NAs
		if(na.rm){
			has.na <- as.numeric(which(is.na(sapply(ret,"[",1))))	
			if(length(has.na) == length(ret)){
				stop("All quasi-deviance calculations produced `NA` values.")
			}
			if(length(has.na > 0L)){		
				message("Removing `NA` values from results of quasi-deviance calculation.")
				return( structure(ret[-has.na],  "hasNa"=has.na))
			}
		}
		return(ret)

	}, error = function(e) {
		message(.makeMessage("Calculation of quasi-deviance failed: ",conditionMessage(e)))
	 	stop(e)  # re-throw error
	})
}



#' @name mahalDist
#'
#' @title Mahalanobis distance of statistics
#'
#' @description
#'  Compute the Mahalanobis distance (MD) based on the kriging models of statistics  	 
#' 
#' @param points	  either matrix or list of points or a vector of parameters (but then considered
#' 					  as a single (multidimensional) point)
#' @param qsd		  object of class \code{\link{QLmodel}} 
#' @param Sigma		  either a constant variance matrix estimate or an prespecified value 
#' @param ...		  further arguments passed to \code{\link{covarTx}} for variance average approximation
#' @param cvm		  list of fitted cross-validation models (see \code{\link{prefitCV}})
#' @param obs         numeric vector of observed statistics (overwrites `\code{qsd$obs}` if given)
#' @param inverted    logical, \code{FALSE} (default), whether `\code{Sigma}` is already inverted when
#' 					  used as constant variance matrix only
#' @param check       logical, \code{TRUE} (default), whether to check all input arguments
#' @param w			  numeric value, scalar weight, \code{0<=w<=1}, for evaluation of sampling criterion 
#' @param value.only  integer, \code{=0} (default), return the value of the MD only if \code{=1} and the criterion sampling value if \code{=2}
#' @param na.rm   	  logical, if \code{TRUE} (default) remove `Na` values from the results
#' @param cl		  cluster object, \code{NULL} (default), of class \code{MPIcluster}, \code{SOCKcluster}, \code{cluster}
#' @param verbose     if \code{TRUE}, then print intermediate output
#' 
#' @return Either a vector of MD values or a list of lists, where each contains the following elements:
#' \item{value}{ Mahalanobis distance value}
#' \item{par}{ parameter estimate}
#' \item{I}{ approximate variance matrix of the parameter estimate}
#' \item{score}{ gradient of MD (for constant `\code{Sigma}`)}
#' \item{jac}{ Jacobian of sample mean values of statistics}
#' \item{varS}{ estimated variance matrix of `\code{score}`}
#' 
#' and the following attributes:
#' 
#' \item{Sigma}{ estimate of variance matrix}
#' \item{inverted}{ whether `\code{Sigma}` was inverted} 
#'  
#' @details	The function computes the Mahalanobis distance of the given statistics \eqn{T(X)\in R^p} with different options
#'  for the approximation type of the variance matrix. The Mahalanobis distance can be used as an alternative criterion function
#'  for estimating the unknown model parameter during the main estimation function \code{\link{qle}}.
#'  
#'  There are several options how to estimate or choose the variance matrix of the statistics \eqn{\Sigma}.
#'  First, in case of a given constant variance matrix estimate `\code{Sigma}`, the Mahalanobis distance reads
#' 	\deqn{ (T(x)-E_{\theta}[T(X)])^t\Sigma^{-1}(T(x)-E_{\theta}[T(X)]) }
#'  and `\code{Sigma}` is used as given.
#' 
#'  As a second option, the variance matrix \eqn{\Sigma} can be estimated by the average approximation 
#'  \deqn{\bar{V}=\frac{1}{n}\sum_{i=1}^n V_i  }
#'  based on the simulated variance matrices \eqn{V_i=V(\theta_i)} of statistics over all sample points
#'  \eqn{\theta_1,...,\theta_n} (see vignette).
#'  Unless `\code{qsd$var.type}` equals "\code{const}" additional prediction variances are added as diagonal terms in order
#'  to account for the kriging approximation error of the statistics using kriging. A weighted version of these average approximation
#'  types is also available (see \code{\link{covarTx}}).
#'  
#'  As a continuous variance approximation we use a kriging approach (see [1]). Then \deqn{\Sigma(\theta) = Var_{\theta}(T(X))}
#'  denotes the variance matrix which depends on the parameter \eqn{\theta\in R^q} and corresponds to the
#'  formal function argument `\code{points}`. Each time a value of the criterion function is calculated at any parameter
#'  `\code{points}` the variance matrix is estimated by the correpsonding approach with added prediction variances as explained above.
#'  Note that in this case the argument `\code{Sigma}` is ignored.
#' 
#' @examples
#'  data(normal)
#'  # (weighted) least squares
#'  mahalDist(c(2,1), qsd, Sigma=diag(2))
#'  
#'  # generalized LS with variance average approximation 
#' 	# here: same as quasi-deviance
#'  mahalDist(c(2,1), qsd)  
#'  
#' @author M. Baaske
#' @rdname mahalDist
#' @export
mahalDist <- function(points, qsd, Sigma = NULL, ..., cvm = NULL, obs = NULL,
		               inverted = FALSE, check = TRUE, w = 0.5, value.only = FALSE, na.rm = TRUE,
					    cl = NULL, verbose = FALSE)
{
	  
	  if(check)
	   .checkArguments(qsd,Sigma=Sigma)
   	  stopifnot(is.numeric(w))
	  if(!is.list(points))
	 	 points <- .ROW2LIST(points)	  	  
	  X <- as.matrix(qsd$qldata[seq(attr(qsd$qldata,"xdim"))])  

	  # may overwrite (observed) statistics	
	  if(!is.null(obs)) {
		  obs <- unlist(obs)
		  if(anyNA(obs) | any(!is.finite(obs)))
			 warning("`NA` or `Inf`values detected in `obs.")
		  if(!is.numeric(obs) || length(obs)!=length(qsd$covT))
			 stop("`obs` must be a (named) `numeric` vector or list \n
							  of length equal to the given statistics in `qsd`.")
		  qsd$obs <- obs
	  }  
	  
	  # Priorities:
 	  #  1. kriging approx.
	  #  2. Average approx. computed by W and at theta; or by kriging
	  #  3. Sigma constant
	  tryCatch({		   
		   if(!(qsd$var.type %in% c("kriging","const"))) {			   
			   # Sigma is inverted at C level
			   Sigma <- covarTx(qsd,...,cvm=cvm)[[1L]]$VTX			   
		   } else if(qsd$var.type == "const" && !inverted){
			   if(is.null(Sigma))
				 stop("`Sigma` cannot be NULL if used as a constant variance matrix.")	
			   # Only for constant Sigma, which is used as is!
				inverted <- TRUE
				# now try to invert
				Sigma <- try(gsiInv(Sigma),silent=TRUE)
				if(inherits(Sigma,"try-error")) {
				  msg <- paste0("Inversion of constant variance matrix failed.")
				  message(msg)
				  return(.qleError(message = msg,error=Sigma))
			    }
		   }
			
		   qlopts <- list("varType"=qsd$var.type, "useCV"=!is.null(cvm)) 
		  
		   ret <-
		    if(length(points) >= 100 && (length(cl)>1L || getOption("mc.cores",1L) > 1L)){
			   m <- if(!is.null(cl)) length(cl) else getOption("mc.cores",1L)			   				
			   M <- .splitList(points, m)
			   names(M) <- NULL
			   unlist(
				   do.call(doInParallel,
						c(list(X=M,
							   FUN=function(points, qsd, qlopts, X, Sigma, cvm, value.only,w) {
								   .Call(C_mahalanobis,points,qsd,qlopts,X,Sigma,cvm,value.only,w)	 
							   }, cl=cl),
					   list(qsd, qlopts, X, Sigma, cvm, value.only,w))),
				recursive = FALSE)			  
			   
		    } else {
			  .Call(C_mahalanobis,points,qsd,qlopts,X,Sigma,cvm,value.only,w)
			}
			
			# if it is computed by W, theta
			# or the constant Sigma passed
			if(qsd$var.type=="const")			 
			 attr(Sigma,"inverted") <- inverted					  
	 		
			# check for NAs/NaNs
			if(na.rm){
				has.na <- as.numeric(which(is.na(sapply(ret,"[",1))))	
				if(length(has.na) == length(ret)){
					stop("All  Mahalanobis distance calculations produced `NA` values.")
				}
				if(length(has.na>0L)){
					warning("Removed `NA` values from results of Mahalanobis distance calculation.")
					return(	structure(ret[-has.na],	"hasNa"=has.na))
			    }				
			}
			return(ret)					
							
		}, error = function(e) {
			 message(.makeMessage("Calculation of Mahalanobis-distance failed: ",
					conditionMessage(e)))
			 stop(e)  
		}
	)
	
}

#' @name multiDimLHS
#'
#' @title Multidimensional Latin Hypercube Sampling (LHS) generation
#'
#' @description The function generates or augments a multidimensional design in a hyperbox.
#'
#' @param N		    number of points to randomly select or augment an existing sample set
#' @param lb	    lower bounds defining the (hyper)box of the parameter search space
#' @param ub 		upper bounds defining the (hyper)box of the parameter search space
#' @param method    type of sampling, `\code{randomLHS}`, `\code{maximinLHS}` or `\code{augmentLHS}` 				    
#' @param X 		optional, matrix of existing sample points, \code{NULL} (default), for augmentation only
#' @param type 		either "\code{list}" or "\code{matrix}" as return type
#'
#' @return Either return a list or matrix of sampled vectors or newly generated points if an existing sample set
#'  was to be augmented.
#'
#' @rdname multiDimLHS
#' @importFrom lhs randomLHS maximinLHS augmentLHS
#'
#' @examples
#' data(normal)
#' # generate a design
#' X <- multiDimLHS(N=5,qsd$lower,qsd$upper,type="matrix")
#' 
#' # augment design X 
#' rbind(X,multiDimLHS(N=1,qsd$lower,qsd$upper,X=X,
#' 				method="augmentLHS",type="matrix"))
#' 
#' 
#' @author M. Baaske
#' @importFrom lhs randomLHS maximinLHS augmentLHS
#' @export
#' @seealso \code{\link[lhs]{randomLHS}}, \code{\link[lhs]{maximinLHS}}, \code{\link[lhs]{augmentLHS}}
multiDimLHS <- function(N, lb, ub, method = c("randomLHS","maximinLHS","augmentLHS"),
						X = NULL, type = c("list","matrix"))
{	
	if(!is.null(X) && !is.matrix(X))
	  stop("`X` must be a matrix.")
	dimX <- length(ub)
	if(dimX != length(lb))
	  stop("`lb` and `ub` bounds vector do not match.")
	
    method <- get(match.arg(method))	
	# back to [0,1]
	lhs.grid <- 
		if(!is.null(X)) {				  	
			for(i in 1L:dimX ) {
			 stopifnot(ub[i]!=lb[i])
			 X[,i] <- (X[,i]-lb[i])/(ub[i]-lb[i])		 
		 	}
			# augment X, only return newly generated points
			if( N < 1L)
			  stop("Number of points to augment must be positive (N > 0).")
			rbind(do.call(method,list("lhs"=X,"m"=N))[(nrow(X)+1L):(nrow(X)+N),])		
		} else do.call(method,list("n"=N,"k"=dimX))
	for(i in 1L:dimX )
	 lhs.grid[,i] <- lhs.grid[,i]*(ub[i]-lb[i])+lb[i]	
 	type <- match.arg(type)
 	switch(type,
		"list" = {
			colnames(lhs.grid) <- names(ub)
			return (lapply(seq_len(nrow(lhs.grid)), function(i) as.list(lhs.grid[i,])))
		},
		"matrix" = {
			colnames(lhs.grid) <- names(ub)
			lhs.grid
		}
     )
}

#' @name Subset of statistics
#' 
#' @title Optimal subset selection of statistics
#' 
#' @description The function finds a subset of at most \eqn{kmax <= p} statistics, where \code{p} is the number of available statistics
#' in the list `\code{qsd$covT}` (and at least of size equal to the length \code{q} of the parameter `\code{theta}`) and thus minimizes the expected
#' estimation error of the parameter when this subset is used for estimation. Based on the eigenvalue decomposition of the
#' variance-covariance matrix of the statistics this subset is chosen among all subsets of size at most equal to `\code{kmax}` or for
#' which all proportional contributions to each parameter component are greater than or equal to `\code{cumprop}` whatever happens first.
#' 
#' Since both matrices depend on `\code{theta}` so does the chosen subset of statistics. However, using a list of parameters as `\code{theta}`
#' returns a list of corresponding subsets. One can then easily choose the most frequent subset among all computed ones given either
#' a sample of parameters distributed over the whole parameter space or an appropriate smaller region, where, e.g., the
#' starting point is chosen from or the true model parameter is expected to lie in. 
#' 
#' @param theta 	list or matrix of points where to compute the criterion function
#' 				 	and to choose `\code{kmax}` statistics given the QL model `\code{qsd}`
#' @param qsd		object of class \code{\link{QLmodel}} 
#' @param kmax   	number of statistics to be selectnred (q <= \code{kmax} <= p)
#' @param cumprop	numeric vector either of length one (then replicated) or equal to the length of `\code{theta}` which sets the
#' 				    proportions (0 < \code{cumprop} <= 1) of minimum overall contributions to each parameter component given the statistics 					
#' @param ...		further arguments passed to \code{\link{quasiDeviance}} or \code{\link{mahalDist}}
#' @param cl		cluster object, \code{NULL} (default), of class \code{MPIcluster}, \code{SOCKcluster}, \code{cluster}
#' @param verbose  	logical, \code{TRUE} for intermediate output
#' 
#' @return A list which consists of 
#' 	\item{id}{ indices of corresponding statistics}
#' 	\item{names}{ names of statistics (if provided)}
#'  \item{cumprop}{ cumulated proportions of contributions of selected statistics to each of the parameter components} 
#'  \item{sorted}{ list of statistics (for each parameter) sorted in decreasing order of proportional contributions to the quasi-information}  
#' 
#' @rdname optStat
#' 
#' @examples
#'  data(normal)
#'  # must select all statistics and thus using the
#'  # full information since we only have to statistics available 
#'  optStat(c("mu"=2,"sigma"=1),qsd,kmax=2)[[1]]
#' 
#' @author M. Baaske
#' @export 
optStat <- function(theta, qsd, kmax = p, cumprop = 1, ..., cl = NULL, verbose=FALSE) 
{	
	p <- length(qsd$covT)
	q <- attr(qsd$qldata,"xdim")
	stopifnot(length(cumprop)>0L)
	if(kmax > p || q > kmax)	
	 stop("`kmax` must be at most equal to the number of available statistics and at least equal to the number of model parameter.")	
 	if(length(cumprop) > 1L){
		if(length(cumprop) != q)
		  stop("`cumprop` must be of length equal to number of parmater components or a scalar value.")
	 	else { stopifnot(all(cumprop<=1) && all(cumprop>0)) }
	} else cumprop <- rep(cumprop,q)
	
	# quasi-deviance
	QD <- quasiDeviance(theta,qsd,...,value.only=FALSE,cl=cl,verbose=verbose) 
	
	# evaluate statistics at theta
	ret <- doInParallel(QD,
			FUN=function(x,q,p,kmax,prop) {
				V <- attr(x,"Sigma")	  
				nms <- colnames(V)
				if(is.null(nms)){
					nms <- paste0("V",1:p)
					attr(V,"dimnames") <- list(nms,nms)
				}
				S <- try(eigen(V),silent=TRUE)
				if(inherits(S,"try-error"))
				  return(S) 
				L <- (x$jac %*% S$vectors %*% diag(1/sqrt(S$values)))^2
				# relative to total contribution
				L <- L/rowSums(L)
				colnames(L) <- nms
				# sort each row as contribution of each statistic to each parameter
				M <- lapply(1:nrow(L),function(i) sort(L[i,], decreasing=TRUE))
				# find either kmax best statistics or until cumulative proportions
				# for each parameter component are greater than prop				
			    T <- sapply(M,"[",1)
				dup <- duplicated(names(T))
				# use only non duplicated names
				if(any(dup)){
				 T[names(T[dup])[1]] <- max(T[dup]) 
				 T <- T[!dup] 
				}
				if( kmax > q && min(T) < min(prop) ) {
					stp <- FALSE
					for(i in 2:p){
						B <- sapply(M,"[",i)
						ix <- order(B,decreasing=TRUE)					
						for(k in ix){
						  if(names(B[k]) %in% names(T))
							next
						  else {
							 T <- c(T,B[k])
							 if(length(T) == kmax || 
								all(sapply(M, function(x) sum(x[names(T)])) >= prop) ){
							   stp <- TRUE
							   break;					     
						   	 }
						  }
					    }
						if(stp) break
					}		
				}
			    names(M) <- names(theta)
				id <- na.omit(pmatch(names(T),colnames(V)))
				# rank matrix
				rankM <- matrix(0,nrow=q,ncol=p)
				dimnames(rankM) <- list(names(theta),nms)
				for(i in 1:nrow(rankM))
					rankM[i,] <- pmatch(colnames(rankM),names(M[[i]]))
				
				structure(list("id"=id, "names"=names(T),
					"cumprop"=apply(L, 1, function(x) sum(x[id])),
					"sorted"=M, "rankMat"=rankM))		
			}, q=q, p=p, kmax=kmax, prop=cumprop,
			cl=cl)	
			
	return( ret )
}