Nothing
#' @title Predict Method for gllvm Fits
#' @description Obtains predictions from a fitted generalized linear latent variable model object.
#'
#' @param object an object of class 'gllvm'.
#' @param type the type of prediction required. The default (\code{"link"}) is on the scale of the linear predictors; the alternative \code{"response"} is on the scale of the response variable. that is, the predictions for the binomial model are predicted probabilities. In case of ordinal data, \code{type = "response"} gives predicted probabilities for each level of ordinal variable.
#' @param newX A new data frame of environmental variables. If omitted, the original matrix of environmental variables is used.
#' @param newTR A new data frame of traits for each response taxon. If omitted, the original matrix of traits is used.
#' @param newLV A new matrix of latent variables. If omitted, the original matrix of latent variables is used. Note that number of rows/sites must be the same for \code{newX} (if X covariates are included in the model).
#' @param level specification for how to predict. Level one (\code{level = 1}) attempts to use the predicted site scores from variational approximations or laplace approximation or given site scores in \code{newLV}. Level 0 sets the latent variable to zero. Defaults to 1.
#' @param offset specification whether of not offset values are included to the predictions in case they are in the model, defaults to \code{TRUE} when offset values that are used to fit the model are included to the predictions. Alternatives are matrix/vector (number of rows must match with the \code{newX}) of new offset values or \code{FALSE}, when offsets are ignored.
#' @param ... not used.
#'
#' @details
#' If \code{newX}, \code{newTR} and \code{newLV} are omitted the predictions are based on the data used for fitting the model. Notice that \code{newTR} need to match with the number of species in the original data.
#' Instead, new sites can be specified in \code{newX}. If predictors \code{newX} (and \code{newTR}) are given, and \code{newLV} is not, latent variables are not used in the predictions.
#'
#' @return A matrix containing requested predictor types.
#' @author Jenni Niku <jenni.m.e.niku@@jyu.fi>, David Warton
#'
#' @examples
#' \donttest{
#'# Load a dataset from the mvabund package
#'data(antTraits)
#'y <- as.matrix(antTraits$abund)
#'X <- scale(antTraits$env[, 1:3])
#'# Fit gllvm model
#'fit <- gllvm(y = y, X, family = poisson())
#'# fitted values
#'predfit <- predict(fit, type = "response")
#'
#'# linear predictors
#'predlin <- predict(fit)
#'# Predict new sites:
#'# Generate matrix of environmental variables for 10 new sites
#'xnew <- cbind(rnorm(10), rnorm(10), rnorm(10))
#'colnames(xnew) <- colnames(X)
#'predfit <- predict(fit, newX = xnew, type = "response", level = 0)
#'
#'TR <- (antTraits$tr[, 1:3])
#'fitt <- gllvm(y = y, X, TR, family = poisson())
#'# linear predictors
#'predlin <- predict(fitt)
#'# Predict new sites:
#'# Generate matrix of environmental variables for 10 new sites
#'xnew <- cbind(rnorm(10), rnorm(10), rnorm(10))
#'colnames(xnew) <- colnames(X)
#'# Generate matrix of traits for species
#'trnew <- data.frame(Femur.length = rnorm(41), No.spines = rnorm(41),
#' Pilosity = factor(sample(0:3, 41, replace = TRUE)))
#'predfit <- predict(fitt, newX = xnew, newTR = trnew, type = "response", level = 0)
#'}
#'@aliases predict predict.gllvm
#'@method predict gllvm
#'
#'@export
#'@export predict.gllvm
predict.gllvm <- function(object, newX = NULL, newTR = NULL, newLV = NULL, type ="link", level = 1, offset = TRUE, ...){
r0 <- NULL
newdata <- newX
p <- ncol(object$y)
n <- max(nrow(object$y), nrow(newdata), nrow(newLV))
if (!is.null(newdata))
n <- nrow(newdata)
if (is.null(newdata) && !is.null(object$X) && !is.null(newLV)) {
if (nrow(newLV) != nrow(object$y)) {
stop("Number of rows in newLV must equal to the number of rows in the response matrix, if environmental variables are included in the model and newX is not included.")
}
}
if (is.null(newdata) & !is.null(newLV)) {
n <- nrow(newLV)
if(!is.null(object$X) || !is.null(object$lv.X)){
if((nrow(object$y)!=n))
stop("Number of rows in 'newLV' must be the same as the number of rows in the data used for model fitting, if new X values for the corresponding units (in 'newLV') are not given.")
}
}
if (!is.null(object$X)) {
formula <- formula(terms(object))
}
else {
formula <- NULL
}
if (object$row.eff != FALSE) {
if(!is.null(newX) & length(all.vars(object$call$row.eff))>0){
if(all.vars(object$call$row.eff) %in% colnames(newX)) {
warning("Using row effects for predicting new sites does not work yet.")
}
} else if ((length(object$params$row.params) != nrow(object$y)) & is.null(newX))
object$params$row.params = c(object$TMBfn$env$data$dr0 %*%
object$params$row.params)
}
b0 <- object$params$beta0
eta <- matrix(b0, n, p, byrow = TRUE)
if (!is.null(newTR))
if (nrow(newTR) != p)
stop("Number of rows in newTR must match to the number of responses in the original data matrix.")
if (is.null(colnames(object$y))) {
colnames(object$y) <- paste("y", 1:p, sep = "")
}
if (!is.null(object$X) && is.null(object$TR)) {
B <- object$params$Xcoef
if (is.null(newdata)) {
X.d <- Xnew <- object$X
}
else {
X.d <- Xnew <- newdata
}
if (!is.null(newdata)) {
if (is.null(object$call$formula)) {
n1 <- colnames(newdata)
formula1 <- paste("~ ", n1[1], sep = "")
if (length(n1) > 1) {
for (i1 in 2:length(n1)) {
formula1 <- paste(formula1, n1[i1], sep = "+")
}
}
formula1 <- paste(formula1, "1", sep = "-")
formula1 <- formula(formula1)
Xnew <- as.matrix(model.matrix(formula1, data = data.frame(newdata)))
}
formula <- formula(object$formula)
xb <- as.matrix(model.matrix(formula, data = data.frame(Xnew)))
X.d <- as.matrix(xb[, !(colnames(xb) %in% c("(Intercept)")),
drop = F])
colnames(X.d) <- colnames(xb)[!(colnames(xb) %in%
c("(Intercept)"))]
}
else {
X.d <- object$X.design
}
eta <- eta + X.d %*% t(B)
}
if (!is.null(object$X) && !is.null(object$TR)) {
B <- object$params$B
X.d <- object$X.design
if (!is.null(newdata) && !is.null(newTR)) {
Xnew <- newdata
TRnew <- newTR
y1 <- object$y[sample(1:nrow(object$y), nrow(Xnew),
replace = TRUE), ]
yX <- reshape(data.frame(cbind(y1, Xnew)), direction = "long",
varying = colnames(data.frame(y1)), v.names = "y",
timevar = "species")
TR2 <- data.frame(species = 1:p, TRnew)
yXT <- merge(yX, TR2, by = "species")
X.d <- as.matrix(model.matrix(formula, data = yXT))
}
if (!is.null(newdata) && is.null(newTR)) {
formula <- formula(object$formula)
n1 <- colnames(newdata)
formula1 <- paste("~", n1[1], sep = "")
if (length(n1) > 1) {
for (i1 in 2:length(n1)) {
formula1 <- paste(formula1, n1[i1], sep = "+")
}
}
formula1 <- paste(formula1, "1", sep = "-")
formula1 <- formula(formula1)
Xnew <- as.matrix(model.matrix(formula1, data = data.frame(newdata)))
TRnew <- object$TR
y1 <- object$y[sample(1:nrow(object$y), nrow(Xnew),
replace = TRUE), ]
yX <- reshape(data.frame(cbind(y1, Xnew)), direction = "long",
varying = colnames(data.frame(y1)), v.names = "y",
timevar = "species")
TR2 <- data.frame(species = 1:p, TRnew)
yXT <- merge(yX, TR2, by = "species")
X.d <- as.matrix(model.matrix(formula, data = yXT))
}
if (is.null(newdata) && !is.null(newTR)) {
if (nrow(newTR) != p)
stop("Number of rows in TRnew must equal to the number of response variables.")
formula <- formula(object$formula)
TRnew <- newTR
n1 <- colnames(newTR)
formula1 <- paste("~", n1[1], sep = "")
if (length(n1) > 1) {
for (i1 in 2:length(n1)) {
formula1 <- paste(formula1, n1[i1], sep = "+")
}
}
formula1 <- paste(formula1, "1", sep = "-")
formula1 <- formula(formula1)
TRnew <- as.matrix(model.matrix(formula1, data = data.frame(newTR)))
Xnew <- object$X
y1 <- object$y[sample(1:nrow(object$y), nrow(Xnew),
replace = TRUE), ]
yX <- reshape(data.frame(cbind(y1, Xnew)), direction = "long",
varying = colnames(data.frame(y1)), v.names = "y",
timevar = "species")
TR2 <- data.frame(species = 1:p, TRnew)
yXT <- merge(yX, TR2, by = "species")
X.d <- as.matrix(model.matrix(formula, data = yXT))
}
X.d <- as.matrix(X.d[, colnames(X.d) != "(Intercept)"])
eta <- eta + matrix(X.d %*% B, n, p)
if (!is.null(object$randomX)) {
if (!is.null(newdata)) {
tr <- try(X.xr <- as.matrix(model.matrix(object$randomX,
data = yXT)), silent = TRUE)
if (inherits(tr, "try-error") | (object$randomX=="~.")) {
X.xr <- as.matrix(yXT[, colnames(object$Xrandom)])
colnames(X.xr) <- colnames(object$Xrandom)
}
rnam <- colnames(X.xr)[!(colnames(X.xr) %in%
c("(Intercept)"))]
xr <- as.matrix(X.xr[1:n, !(colnames(X.xr) %in%
c("(Intercept)"))])
if (NCOL(xr) == 1)
colnames(xr) <- rnam
} else {
xr <- object$Xrandom
}
eta <- eta + matrix(xr %*% object$params$Br, n, p)
}
}
if (level == 1) {
if (is.null(newLV) && !is.null(newdata) & (object$num.lv +
object$num.lv.c) > 0) {
if (nrow(newdata) != nrow(object$y)) {
stop("Level 1 predictions cannot be calculated for new X values if new latent variable values are not given. Change to 'level = 0' predictions.")
}
}
if (object$num.lv > 0 | (object$num.lv.c + object$num.RR) >
0) {
if(!is.null(object$lvs)){
if(nrow(object$lvs)!=n) object$lvs = object$TMBfn$env$data$dLV%*%object$lvs # !!!
}
if (!is.null(newLV)) {
if (ncol(newLV) != (object$num.lv + object$num.lv.c))
stop("Number of latent variables in input doesn't equal to the number of latent variables in the model.")
if (!is.null(newdata)) {
if (nrow(newLV) != nrow(newdata))
stop("Number of rows in newLV must equal to the number of rows in newX, if newX is included, otherwise same as number of rows in the response matrix.")
}
if (object$num.RR == 0) {
lvs <- t(t(newLV) * object$params$sigma.lv)
} else {
if (object$num.lv.c > 0) {
lvs <- cbind(t(t(newLV[, 1:object$num.lv.c]) *
object$params$sigma.lv[1:object$num.lv.c]),
matrix(0, ncol = object$num.RR, nrow = n),
t(t(newLV[, -c(1:object$num.lv.c)]) *
object$params$sigma.lv[1:object$num.lv]))
}
else if (object$num.lv > 0 & object$num.lv.c == 0) {
lvs <- cbind(matrix(0, ncol = object$num.RR,
nrow = n), t(t(newLV) * object$params$sigma.lv))
} else {
lvs <- matrix(0, ncol = object$num.RR, nrow = n)
}
}
} else {
if (object$num.RR == 0) {
lvs <- t(t(object$lvs) * object$params$sigma.lv)
} else {
if (object$num.lv.c > 0) {
lvs <- cbind(t(t(object$lvs[, 1:object$num.lv.c]) *
object$params$sigma.lv[1:object$num.lv.c]),
matrix(0, ncol = object$num.RR, nrow = n),
t(t(object$lvs[, -c(1:object$num.lv.c)]) *
object$params$sigma.lv[1:object$num.lv]))
}
else if (object$num.lv > 0 & object$num.lv.c ==
0) {
lvs <- cbind(matrix(0, ncol = object$num.RR,
nrow = n), t(t(object$lvs) * object$params$sigma.lv))
}
else {
lvs <- matrix(0, ncol = object$num.RR, nrow = n)
}
}
}
if ((object$num.lv.c + object$num.RR) > 0 & !is.null(newdata)) {
lv.X <- model.matrix(object$lv.formula, as.data.frame(newdata))[,-1, drop = F]
} else {
lv.X <- object$lv.X
}
theta <- (object$params$theta[, 1:(object$num.lv +
(object$num.lv.c + object$num.RR)), drop = F])
eta <- eta + lvs %*% t(theta)
if ((object$num.lv.c + object$num.RR) > 0) {
eta <- eta + lv.X %*% object$params$LvXcoef %*%
t((theta[, 1:(object$num.lv.c + object$num.RR), drop = F]))
}
if (object$quadratic != FALSE) {
if(object$num.lv>0){
theta2 <- (object$params$theta[, -c(1:(object$num.lv.c + object$num.RR+object$num.lv)), drop = F])
theta2 <- (theta2[, (object$num.lv.c + object$num.RR+1):ncol(theta2), drop = F])
eta <- eta + (lvs[,(ncol(lvs)-object$num.lv+1):ncol(lvs), drop = F])^2 %*% t(theta2)
}
if ((object$num.lv.c + object$num.RR) > 0) {
theta2 <- object$params$theta[,-c(1:(object$num.lv+object$num.lv.c+object$num.RR))]
theta2C <- abs(theta2[, 1:(object$num.lv.c +
object$num.RR), drop = F])
lvs <- lvs[,1:(object$num.lv.c+object$num.RR)] + lv.X%*%object$params$LvXcoef
for (j in 1:p) {
eta[, j] <- eta[, j] - lvs^2%*%theta2C[j, ]
}
}
}
}
}
if ((object$row.eff %in% c("random", "fixed", "TRUE")) && is.null(r0) & is.null(newX)) {
if(!is.null(object$params$row.params)){
if(length(object$params$row.params)!=n) object$params$row.params = c(object$TMBfn$env$data$dr0%*%object$params$row.params) # !!!
}
if(nrow(eta) == length(object$params$row.params)){
r0 <- object$params$row.params
if((object$row.eff %in% "random") && (level==0)) r0 = r0*0
eta <- eta + r0
}
}
if(!is.null(object$offset)){
if(offset!=FALSE){
if(is.matrix(offset)){
if((NROW(offset) == NROW(eta))){
eta <- eta+object$offset
} else {stop(paste("Incorrect dimension for the 'offset', number of rows should now be ", NROW(eta)))}
} else if((NROW(object$offset) == NROW(eta))){
eta <- eta+object$offset
} else {warning(paste("Could not include offset values as 'object$offset' has incorrect dimension, set 'offset = FALSE' or include new offset values"))}
}
}
if(object$family %in% c("poisson", "negative.binomial", "tweedie", "gamma", "exponential"))
ilinkfun <- exp
if (object$family == "binomial" || (object$family == "beta") || (object$family == "betaH") || (object$family == "orderedBeta"))
ilinkfun <- binomial(link = object$link)$linkinv
if (object$family == "ordinal")
ilinkfun <- pnorm
if (object$family %in% c("ZIP","ZINB"))
ilinkfun <- function(eta) exp(eta) * (1 - matrix(object$params$phi,
n, p, byrow = TRUE))
if (object$family == "gaussian")
ilinkfun <- gaussian()$linkinv
out <- NULL
preds <- NULL
if ("link" %in% type)
out <- eta
if ("response" %in% type)
out <- ilinkfun(eta)
if (is.null(newdata) && is.null(newTR) && is.null(newLV) &&
"logL" %in% type)
out <- object$logL
if (object$family == "ordinal" && type == "response") {
if (object$zeta.struc == "species") {
k.max <- apply(object$params$zeta, 1, function(x) length(x[!is.na(x)])) +
1
preds <- array(NA, dim = c(max(k.max), nrow(eta),
p), dimnames = list(paste("level", 1:max(k.max),
sep = ""), NULL, NULL))
for (i in 1:n) {
for (j in 1:p) {
probK <- NULL
probK[1] <- pnorm(object$params$zeta[j, 1] -
eta[i, j], log.p = FALSE)
probK[k.max[j]] <- 1 - pnorm(object$params$zeta[j,
k.max[j] - 1] - eta[i, j])
if (k.max[j] > 2) {
j.levels <- 2:(k.max[j] - 1)
for (k in j.levels) {
probK[k] <- pnorm(object$params$zeta[j,
k] - eta[i, j]) - pnorm(object$params$zeta[j,
k - 1] - eta[i, j])
}
}
preds[, i, j] <- c(probK, rep(NA, max(k.max) -
k.max[j]))
}
}
out <- preds
}
else {
k.max <- length(object$params$zeta) + 1
preds <- array(NA, dim = c(k.max, nrow(eta), p),
dimnames = list(paste("level", 1:max(k.max),
sep = ""), NULL, NULL))
for (i in 1:n) {
for (j in 1:p) {
probK <- NULL
probK[1] <- pnorm(object$params$zeta[1] - eta[i,
j], log.p = FALSE)
probK[k.max] <- 1 - pnorm(object$params$zeta[k.max -
1] - eta[i, j])
levels <- 2:(k.max - 1)
for (k in levels) {
probK[k] <- pnorm(object$params$zeta[k] -
eta[i, j]) - pnorm(object$params$zeta[k -
1] - eta[i, j])
}
preds[, i, j] <- c(probK)
}
}
out <- preds
}
dimnames(preds)[[3]] <- colnames(object$y)
}
try(rownames(out) <- 1:NROW(out), silent = TRUE)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.