R/PLUE.R
In pse: Parameter Space Exploration with Latin Hypercubes

Documented in plot.PLUE PLUE print.PLUE

#'	Profiled Likelihood Uncertainty Estimation
#'
#'	Performs a likelihood-based uncertainty estimation on a model. This analysis consists
#'	on a Metropolis Monte Carlo exploration of the parameter space and subsequent profiling
#'	of model results based on the likelihood of the input parameters.
#'
#'	A detailed description can be found on Chalom & Prado (2015).
#' @inheritParams LHS
#' @param model The function to be run, representing the model or simulation.
#' @param N The number of samples to be generated by the Metropolis algorithm.
#' @param LL
#'	  The POSITIVE Likelihood function to be used by the Metropolis algorithm. It must accept
#'	  an array with length equal to the number of factors.
#' @param start
#'	  The initial point to be evaluated. Must have the same length as the number of factors.
#' @param method
#'	  May be either "internal", which runs a naive and inneficient algorithm provided for test
#'	  and didatic purposes, or "mcmc", which will run the \code{metrop} function from the \code{mcmc} package.
#' @param opts Further options to be passed to the Metropolis function. See the help on 
#' \code{\link[mcmc]{metrop}}.
#'
#' @references
#'  Chalom, A. and Prado, P.I.K.L. 2015. Uncertainty analysis and composite 
#'  hypothesis under the likelihood paradigm. \emph{arXiv}:1508.03354 [q-bio.QM]
#' @export
PLUE <- function(model=NULL, factors, N, LL, start, res.names=NULL,
				 method=c("internal", "mcmc"), opts = list(), nboot=0, 
				 repetitions=1, cl=NULL) {
	# Input validation for common errors and "default" value handling:
	method = match.arg(method)
	my.opts = list(Q=NULL, blen=1, nspac=1, scale=1)
	my.opts[names(opts)] <- opts

	if (is.numeric(factors) && length(factors) == 1) {
		factors = paste("I", 1:factors, sep = "")
	} else if (!is.character(factors)) {
		stop("Error in function PLUE: factors should be either a single number or a character vector")
	}
	if (!is.numeric(N) || length(N) != 1) {
		stop("Error in function PLUE: N should be a single number")
	}
	if (class(LL) != "function") {
		stop("Error in function PLUE: nLL should be a function")
	}
	if (length(factors) != length(start)) {
		stop("The length of factors and start should be the same!")
	}
	if (N < 1000) {
		stop ("Error, N is to small. Use N=1000 or larger")
	}
	s <- LL(start)
	if (is.infinite(s) | is.nan(s) | is.na(s)) {
		stop ("Error, LL(start) should be a valid number.")
	}

	if (method=="internal") {
		data = MCMC.internal(LL, start, N, my.opts)
	} else { # using the mcmc package. May fail if mcmc is not installed
		outfun <- function(x) return(c(x, -LL(x))) # makes metrop return the nLL along with each data point
		temp <- mcmc::metrop(LL, start, N, blen=my.opts$blen, nspac=my.opts$nspac, scale=my.opts$scale, outfun=outfun)
		data <- list(L=as.data.frame(temp$batch[,-ncol(temp$batch)]), nLL=temp$batch[,ncol(temp$batch)])
	}
	# Apply the model to the input data
	colnames(data$L) <- factors
	res <- internal.run(cl, model, data$L, repetitions)
	prcc <- internal.prcc(data$L, res, nboot)
	if (is.null(res.names) && ! is.na(res)) res.names <- paste("O", 1:dim(res)[2], sep="")

	# Now we profile the results to find out 
	# "what are the lower/upper limits to the 0.1-Delta nLL region?"
	# "what are the lower/upper limits to the 0.2-Delta nLL region?"
	# and so on and so on
	
	mmin <- min(data$nLL[valid(data$nLL) ])
	mmax <- max(data$nLL[valid(data$nLL)])
	myN <-round(N/100)
	prof <- seq(mmin, mmax, length.out=myN)
	lower = c(); upper = c();
	for (i in 1:myN)
	{
		search <- res[data$nLL <= prof[i]]
		search <- search[valid(search)] #throw away NA, NaN, Inf, etc
		lower[i] <- min(search); upper[i] <- max(search)
	}
	# finally, we subtract the minimum LL to normalize the profile to 0
	prof = prof - mmin
	profile <- rbind(data.frame(limit = lower[myN:1], ll = prof[myN:1]), data.frame(limit=upper, ll = prof))

	X <- list(call = match.call(), N = N, model = model, factors = factors, LL = LL, nLL = data$nLL, start = start,
			  opts = opts, data = data$L, res = res, profile = profile, prcc = prcc, nboot=nboot, res.names=res.names)
	class(X) <- "PLUE"
	return(X)
}
	
#' @export
#' @rdname PLUE
print.PLUE <- function(x, ...) {
	cat("\nCall:\n", deparse(x$call), "\n", sep = "")
	cat("Model:\n"); print (x$model);
	cat("Factors:\n"); print (x$factors);
	cat("Min Likelihood at:\n"); print (unique(x$res[x$nLL == min(x$nLL)]));
}

#' @export
#' @rdname PLUE
plot.PLUE <- function(x, ...) {
	plot(x$profile, type="l", main="PLUE", xlab="Result", ylab="Delta Likelihood", ...)
	abline(h=2, lty=3)
}
### Internal use only
valid <- function(x) (!is.infinite(x)) & (!is.nan(x)) & (!is.na(x)) 

# Simple Metropolis algorithm, provided to make the analyses possible without
# depending on the mcmc package
MCMC.internal <- function(LL, start, N, opts) {
	# jumping distribution
	if (is.null(opts$Q)) {
		Q <- function (x) rnorm (length(start), as.numeric(x), rep(0.02, length(start)))
	} else {
		Q <- opts$Q
	}
	current <- as.numeric(start)
	# L will hold our input data
	L = as.data.frame(matrix(nrow = N, ncol = length(start)))
	L[1,] <- current
	# nLL (soon to be a vector) will hold the -LL of each data point
	nLL = -LL(current)
	#
	#Metropolis/Monte Carlo Iteration
	#
	for (i in 2:N) {
		new <- Q(current)
		alpha <- LL(new) - LL(current)
		if (is.nan(alpha)) alpha = -Inf
		if (alpha >= 0 || runif(1,0,1) < exp(alpha)) { # point accepted
			L[i,] <-  new
			nLL[i] <- - LL(new)
			current <- new
		} else { # we continue on the same point
			L[i,] <- current
			nLL[i] <- -LL(new)
		}
	}
	return (list(L=L, nLL=nLL))
}