#' @title predict
#'
#' @description Calculates predicted values from a fitted \code{Hmsc} model.
#'
#' @param object a fitted \code{Hmsc} model object
#' @param post a list of posterior samples of the HMSC model. By default uses all samples from the pooled
#' posterior of the hM object.
#' @param XData a dataframe specifying the unpreprocessed covariates for the predictions to be made.
#' Works only if the \code{XFormula} argument was specified in the \code{Hmsc()} model constructor call.
#' Requirements are similar to those in the \code{Hmsc} model constructor.
#' @param X a matrix specifying the covariates for the predictions to be made.
#' Only one of XData and X arguments may be provided.
#' @param XRRRData a dataframe of covariates for reduced-rank regression
#' @param XRRR a matrix of covariates for reduced-rank regression
#' @param studyDesign a matrix, specifying the structure of the study design for the prediction.
#' Requirements are similar to those of the \code{Hmsc} constructor. By default this argument is
#' assigned the study design of the training data in the fitted Hmsc model.
#' @param ranLevels a list of \code{HmscRandomLevel} objects, futher specifying the structure of
#' random levels. Requirements are similar to those of the \code{Hmsc} constructor.
#' Each level must cover all units, specified in the correspondingly named column of \code{studyDesign}
#' argument. By default this argument is assigned the list of \code{HmscRandomLevel} objects
#' specified for fitting Hmsc model.
#'
#' @param Gradient an object returned by
#' \code{\link{constructGradient}}. Providing \code{Gradient} is
#' an alternative for providing \code{XData}, \code{studyDesign}
#' and \code{ranLevels}. Cannot be used together with \code{Yc}.
#'
#' @param Yc a matrix of the outcomes that are assumed to be known for
#' conditional predictions. Cannot be used together with
#' \code{Gradient} and use with caution for spatial models (see details).
#'
#' @param mcmcStep the number of extra mcmc steps used for updating the random effects
#' @param expected boolean flag indicating whether to return the location parameter of the observation
#' models or sample the values from those.
#' @param predictEtaMean boolean flag indicating whether to use the estimated mean values of posterior
#' predictive distribution for random effets corresponding for the new units.
#' @param predictEtaMeanField boolean flag indicating whether to use draws from the mean-field of the
#' posterior predictive distribution for random effets corresponding for the new units.
#'
#' @param nParallel Number of parallel processes. Parallel processing
#' is only useful with new \code{Yc} data and extra
#' \code{mcmcStep}.
#' @param useSocket (logical) Use socket clusters in parallel
#' proecessing; these are the only alternative in Windows, but in
#' other systems this should be usually set \code{FALSE} for
#' forking.
#'
#' @param \dots other arguments passed to functions.
#'
#' @details In case of conditional predictions (once non null \code{Yc} is provided)
#' \code{mcmcStep},the number of extra mcmc steps used for updating the random effects
#' for the Eta parameters, starting from the samples of the fitted Hmsc model in order to
#' account for the conditional infromation provided in the Yc argument. The higher this number is,
#' the more the obtained updated samples are unaffected by the posterior estimates of latent factors
#' in the model fitted to the training data and more resembles the true conditional posterior. However,
#' the elapsed time for conditional prediction grows approximately linearly as this parameter increases.
#' The exact number for sufficient is problem-dependent and should be assessed by e.g. gradually
#' increasing this parameter till the stationarity of the produced predictions.
#'
#' Note that the currently implemented conditional predictions behavior is well-formulated once the sampling units in
#' \code{Yc} are either the same as in the originally fitted model \code{Y}, or are considered independent of those.
#' Thus, if seeking such conditional predictions at new loacations for a spatial \code{Hmsc} model,
#' or the case once some units of \code{HmscRandomLevel} belong both to the training and predictions sets of sampling units,
#' the current implementations is not guaranteed to correctly operate in intended way.
#'
#' @return A list of length \code{length(post)}, each element of which contains a sample from the posterior
#' predictive distribution (given the sample of the Hmsc model parameters in the corresponding element of
#' the \code{post} argument)
#'
#' @seealso \code{\link{predictLatentFactor}}
#'
#'
#' @importFrom stats model.matrix rnorm pnorm rpois
#' @importFrom parallel detectCores mclapply makeCluster stopCluster
#' clusterExport clusterEvalQ parLapply
#'
#' @export
predict.Hmsc = function(object, post=poolMcmcChains(object$postList), Loff=NULL,
XData=NULL, X=NULL, XRRRData=NULL, XRRR=NULL, # this has to be updated to cov-dependent associations
studyDesign=object$studyDesign, ranLevels=object$ranLevels,
Gradient=NULL, Yc=NULL, mcmcStep=1, expected=FALSE,
predictEtaMean=FALSE, predictEtaMeanField=FALSE,
nParallel = 1,
useSocket = TRUE, ...)
{
## check valid nParallel
nParallel <- min(nParallel, detectCores())
if (nParallel > 1) {
if (.Platform$OS.type == "windows" && !useSocket) {
useSocket <- TRUE
message("setting useSocket=TRUE; the only choice in Windows")
}
}
if(!is.null(Gradient)) {
## don't know what to do if there is also Yc, and spatial models
## will trigger an error in updateEta (github issue #135)
if(!is.null(Yc)) stop("predict with arguments 'Yc' and 'Gradient' jointly is not implemented (yet)")
XData=Gradient$XDataNew
studyDesign=Gradient$studyDesignNew
ranLevels=Gradient$rLNew
}
if(!is.null(XData) && !is.null(X)){
stop("only one of XData and X arguments can be specified")
}
if(!is.null(XRRRData) && !is.null(XRRR)){
stop("only one of XRRRData and XRRR arguments can be specified")
}
if(predictEtaMean==TRUE && predictEtaMeanField==TRUE)
stop("predictEtaMean and predictEtaMeanField arguments cannot be TRUE simultaneously")
if(!is.null(XData)){
switch(class(XData)[1L],
list={
if(any(unlist(lapply(XData, is.na)))) stop("NA values are not allowed in 'XData'")
xlev = lapply(Reduce(rbind,object$XData), levels)[unlist(lapply(Reduce(rbind,object$XData), is.factor))]
X = lapply(XData, function(a) model.matrix(object$XFormula, a, xlev=xlev))
},
data.frame={
if(any(is.na(XData))) stop("NA values are not allowed in 'XData'")
xlev = lapply(object$XData, levels)[unlist(lapply(object$XData, is.factor))]
X = model.matrix(object$XFormula, XData, xlev=xlev)
}
)
} else{
if(is.null(X))
X = object$X
}
if(!is.null(XRRRData)){
xlev = lapply(object$XRRRData, levels)[unlist(lapply(object$XRRRData, is.factor))]
XRRR = model.matrix(object$XRRRFormula, XRRRData, xlev=xlev)
} else{
if(is.null(object$ncRRR)) object$ncRRR=0
if(is.null(XRRR) && object$ncRRR>0)
XRRR=object$XRRR
}
switch(class(X)[1L],
list={
nyNew = nrow(X[[1]])
},
matrix={
nyNew = nrow(X)
}
)
if(!is.null(Yc)){
if(ncol(Yc) != object$ns) stop("number of columns in Yc must be equal to ns")
if(nrow(Yc) != nyNew) stop("number of rows in Yc and X must be equal")
}
if(!is.null(Loff)){
if(ncol(Loff) != object$ns) stop("number of columns in Loff must be equal to ns")
if(nrow(Loff) != nyNew) stop("number of rows in Loff and X must be equal")
}
if(!all(object$rLNames %in% colnames(studyDesign))){
stop("dfPiNew does not contain all the necessary named columns")
}
if(!all(object$rLNames %in% names(ranLevels))){
stop("rL does not contain all the necessary named levels")
}
if(!is.null(studyDesign)){
dfPiNew = studyDesign[,object$rLNames,drop=FALSE]
} else
dfPiNew = matrix(NA,nyNew,0)
rL = ranLevels[object$rLNames]
if(!is.null(Yc)){
## object can have pre-computed data parameters, but not
## necessarily. These are needed only in updateEta(), but get it
## here anyway...
if (is.null(object$rLPar)) {
rLPar = computeDataParameters(object)$rLPar
} else {
rLPar = object$rLPar
}
} else{
rLPar = NULL
}
predN = length(post)
predPostEta = vector("list", object$nr)
PiNew = matrix(NA,nrow(dfPiNew),object$nr)
for(r in seq_len(object$nr)){
postEta = lapply(post, function(c) c$Eta[[r]])
postAlpha = lapply(post, function(c) c$Alpha[[r]])
predPostEta[[r]] = predictLatentFactor(unitsPred=levels(dfPiNew[,r]),units=levels(object$dfPi[,r]),
postEta=postEta,postAlpha=postAlpha,rL=rL[[r]],predictMean=predictEtaMean,predictMeanField=predictEtaMeanField)
rowNames = rownames(predPostEta[[r]][[1]])
PiNew[,r] = sapply(dfPiNew[,r], function(s) which(rowNames==s))
}
if(object$nr > 0){
ppEta <- simplify2array(predPostEta)
} else{
ppEta <- matrix(list(),predN,0)
}
if (nParallel == 1) { # non-Parallel
pred <- lapply(seq_len(predN), function(pN, ...){
# print(ppEta[pN,])
get1prediction(object, X, XRRR, Yc, Loff, rL, rLPar, post[[pN]],
ppEta[pN,], PiNew, dfPiNew, nyNew, expected,
mcmcStep)})
} else if (useSocket) { # socket cluster (Windows, mac, Linux)
seed <- sample.int(.Machine$integer.max, predN)
cl <- makeCluster(nParallel)
clusterExport(cl, "get1prediction", envir = environment())
clusterEvalQ(cl, {
library(Hmsc)})
pred <- parLapply(cl, seq_len(predN), function(pN, ...)
get1prediction(object, X, XRRR, Yc, Loff, rL, rLPar, post[[pN]],
ppEta[pN,], PiNew, dfPiNew, nyNew, expected,
mcmcStep, seed = seed[pN]))
stopCluster(cl)
} else { # fork (mac, Linux)
seed <- sample.int(.Machine$integer.max, predN)
pred <- mclapply(seq_len(predN), function(pN, ...)
get1prediction(object, X, XRRR, Yc, Loff, rL, rLPar, post[[pN]],
ppEta[pN,], PiNew, dfPiNew, nyNew, expected,
mcmcStep, seed = seed[pN]),
mc.cores=nParallel)
}
pred
}
## internal function to get one prediction
##
## Needs following variables or arguments that must be passsed:
## PiNew X XRRR Yc dfPiNew expected mcmcStep nyNew object pN post
## predPostEta rL rLPar
get1prediction <-
function(object, X, XRRR, Yc, Loff, rL, rLPar, sam, predPostEta, PiNew, dfPiNew,
nyNew, expected, mcmcStep, seed = NULL)
{
if (!is.null(seed))
set.seed(seed)
if(object$ncRRR>0){
XB=XRRR%*%t(sam$wRRR)
}
switch(class(X)[1L],
matrix = {
X1=X
if(object$ncRRR>0){
X1=cbind(X1,XB)
}
LFix = X1 %*% sam$Beta
},
list = {
LFix = matrix(NA, nyNew, object$ns)
for(j in 1:object$ns){
X1=X[[j]]
if(object$ncRRR>0){
X1=cbind(X1,XB)
}
LFix[,j] = X1%*%sam$Beta[,j]
}
}
)
LRan = vector("list",object$nr)
Eta = vector("list",object$nr)
for(r in seq_len(object$nr)){
Eta[[r]] = predPostEta[[r]]
if(rL[[r]]$xDim == 0){
LRan[[r]] = Eta[[r]][as.character(dfPiNew[,r]),] %*% sam$Lambda[[r]]
} else{
LRan[[r]] = matrix(0,object$ny,object$ns)
for(k in 1:rL[[r]]$xDim)
LRan[[r]] = LRan[[r]] + (Eta[[r]][as.character(dfPiNew[,r]),]*rL[[r]]$x[as.character(dfPiNew[,r]),k]) %*% sam$Lambda[[r]][,,k]
}
}
L = Reduce("+", c(list(LFix), LRan))
if(!is.null(Loff)) L = L + Loff
## predict can be slow with Yc and especially with high mcmcStep
if(!is.null(Yc) && any(!is.na(Yc))){
Z = L
Z = updateZ(Y=Yc, Z=Z, Beta=sam$Beta, iSigma=1/sam$sigma, Eta=Eta,
Lambda=sam$Lambda, Loff=Loff, X=X, Pi=PiNew, dfPi=dfPiNew,
distr=object$distr, rL=rL)
## species CV from computePredictedValues runs this innermost
## loop nfolds * nfolds.sp * predN * mcmcStep times
for(sN in seq_len(mcmcStep)){
Eta = updateEta(Y=Yc, Z=Z, Beta=sam$Beta, iSigma=1/sam$sigma,
Eta=Eta, Lambda=sam$Lambda, Alpha=sam$Alpha,
rLPar=rLPar, Loff=Loff, X=X, Pi=PiNew, dfPi=dfPiNew, rL=rL)
Z = updateZ(Y=Yc, Z=Z, Beta=sam$Beta, iSigma=1/sam$sigma, Eta=Eta,
Lambda=sam$Lambda, Loff=Loff, X=X, Pi=PiNew, dfPi=dfPiNew,
distr=object$distr, rL=rL)
}
for(r in seq_len(object$nr)){
if(rL[[r]]$xDim == 0){
LRan[[r]] = Eta[[r]][as.character(dfPiNew[,r]),] %*%
sam$Lambda[[r]]
} else{
LRan[[r]] = matrix(0,object$ny,object$ns)
for(k in 1:rL[[r]]$xDim)
LRan[[r]] = LRan[[r]] +
(Eta[[r]][as.character(dfPiNew[,r]),] *
rL[[r]]$x[as.character(dfPiNew[,r]),k]) %*%
sam$Lambda[[r]][,,k]
}
}
L = Reduce("+", c(list(LFix), LRan))
}
if(!expected){
Z = L + matrix(sqrt(sam$sigma),nrow(L),object$ns,byrow=TRUE) * matrix(rnorm(nrow(L)*object$ns),nrow(L),object$ns)
} else{
Z = L
}
for(j in 1:object$ns){
if(object$distr[j,"family"] == 2){ # probit
if(expected){
Z[,j] = pnorm(Z[,j])
} else{
Z[,j] = as.numeric(Z[,j]>0)
}
}
if(object$distr[j,"family"] == 3){ # poisson
if(expected){
Z[,j] = exp(Z[,j] + sam$sigma[j]/2)
} else{
Z[,j] = rpois(nrow(Z),exp(Z[,j]))
}
}
}
colnames(Z) = object$spNames
for(i in 1:object$ns){
m = object$YScalePar[1,i]
s = object$YScalePar[2,i]
if(m!=0 || s!=1){
Z[,i] = Z[,i]*s + m
}
}
Z
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.