## **************************************************************************
##
## (c) 2010-2025 Guillaume Guénard
## Department de sciences biologiques,
## Université de Montréal
## Montreal, QC, Canada
##
## ** Class and method for PEM linear models **
##
## This file is part of MPSEM
##
## MPSEM is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## MPSEM is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with MPSEM. If not, see <https://www.gnu.org/licenses/>.
##
## R source code file
##
## **************************************************************************
##
#'
#' PEM Linear Model Interface
#'
#' @description An interface to estimate a linear model
#'
#' @docType class
#'
#' @name pemlm-class
#'
#' @param formula An object of class "\code{\link{formula}}" (or one that can be
#' coerced to that class): a symbolic description of the model to be fitted.
#' (see `details` in \code{\link{lm}} for further details on model
#' specification.
#' @param data An optional data frame, list or environment (or object coercible
#' by \code{\link{as.data.frame}} to a data frame) containing the variables in
#' the model. If not found in data, the variables are taken from
#' \code{environment(formula)}, typically the environment from which
#' \code{pemlm} is called.
#' @param pem A \code{\link{PEM-class}} object.
#' @param contrasts An optional list. See the \code{contrasts.arg} of
#' \code{\link{model.matrix.default}}.
#' @param x,object A \code{pemlm-class} object.
#' @param newdata An optional data frame containing auxiliary traits used for
#' making predictions.
#' @param newloc A data frame containing the graph locations of species targeted
#' for making predictions.
#' @param se.fit A \code{\link{logical}} specifying whether to return standard
#' errors (default: \code{FALSE}).
#' @param interval One of \code{"none"}, \code{"confidence"}, or
#' \code{"prediction")} specifying the type of interval associated with the
#' predictions.
#' @param level Tolerance/confidence level (default: \code{0.95}).
#' @param ... Additional arguments to be passed to the low level regression
#' fitting functions.
#'
#' @details ...
#'
#' @format A \code{pemlm-class} object contains:
#' \describe{
#' \item{auxModel}{A \code{\link{lm}}-class object.}
#' \item{resetPEM}{A function ...}
#' \item{getPEM}{A function ...}
#' \item{aic}{A function ...}
#' \item{getIncluded}{A function ...}
#' \item{getCandidate}{A function ...}
#' \item{scanCandidate}{A function ...}
#' \item{forward}{A function ...}
#' \item{promote}{A function ...}
#' \item{pemModel}{A function ...}
#' }
#'
#' @return
#' \describe{
#' \item{pemlm}{A \code{pemlm-class} object.}
#' \item{print.pemlm}{\code{NULL} (invisibly).}
#' \item{summary.pemlm}{A \code{summary.lm-class} object.}
#' \item{anova.pemlm}{An \code{anova-class} data frame.}
#' \item{predict.pemlm}{...}
#' }
#'
#' @author \packageAuthor{MPSEM}
#' Maintainer: \packageMaintainer{MPSEM}
#'
#' @references
#' Guénard, G., Legendre, P., and Peres-Neto, P. 2013. Phylogenetic eigenvector
#' maps: a framework to model and predict species traits. Methods in Ecology
#' and Evolution 4: 1120-1131
#'
#' @examples ## Example here...
#'
#' @importFrom stats anova delete.response predict is.empty.model lm.fit .getXlevels as.formula terms .checkMFClasses na.fail
#'
NULL
#'
#' @describeIn pemlm-class
#'
#' PEM Linear Model
#'
#' Calculate a PEM-based linear model for estimating trait values.
#'
#' @export
pemlm <- function(formula, data, pem, ..., contrasts = NULL) {
cl <- match.call()
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
y <- model.response(mf, "numeric")
## if(is.matrix(y))
## stop("This function does not support multivariate regression natively.")
ny <- NROW(y)
if(ny != pem$nsp)
stop("The number of species in 'pem' (", pem$nsp,") is not equal to the ",
"number of observations (", ny,")")
if(is.empty.model(mt))
stop("The model is empty.")
mm <- model.matrix(mt, mf, contrasts)
u <- as.matrix(pem)
resetPEM <- function() {
assign("included", integer(0L), inherits=TRUE)
assign("candidate", 1L:ncol(u), inherits=TRUE)
assign("pemModel", NULL, inherits=TRUE)
invisible(NULL)
}
resetPEM()
getAux <- function(...) {
z <- lm.fit(mm, y, offset = NULL, singular.ok = TRUE, ...)
class(z) <- c(if(is.matrix(y)) "mlm","lm")
z$contrasts <- attr(mm, "contrasts")
z$xlevels <- .getXlevels(mt, mf)
z$call <- cl
z$terms <- mt
z$model <- mf
z
}
auxModel <- getAux(...)
aic <- function(s, ..., k = 2, corrected = TRUE) {
z <- lm.fit(cbind(mm, if(!missing(s)) u[,s,drop=FALSE]), y, offset = NULL,
singular.ok = TRUE, ...)
res <- z$residuals
N <- NROW(res)
M <- NCOL(res)
logl <- -0.5 * (N*M*(log(2*pi) + M - log(N*M) + log(sum(res^2))))
aic <- -2*logl + k*M*(z$rank + 1L)
if(corrected) {
k1 <- length(z$coefficients)
aic <- aic + 2*k1*(k1 + 1)/(N*M - k1 - 1)
}
aic
}
scanCandidate <- function(..., k = 2, corrected = TRUE) {
out <- numeric(length(candidate))
for(i in 1L:length(candidate))
out[i] <- aic(c(included,candidate[i]), ..., k=k, corrected=corrected)
out
}
getModel <- function(...) {
x <- cbind(mm, u[,included,drop=FALSE])
asn <- attr(mm, "assign")
attr(x, "assign") <- c(asn, rep(max(asn) + 1L, length(included)))
attr(x, "contrasts")$U <- "pem"
z <- lm.fit(x, y, offset = NULL, singular.ok = TRUE, ...)
class(z) <- c(if(is.matrix(y)) "mlm", "lm")
z$contrasts <- attr(x, "contrasts")
z$xlevels <- .getXlevels(mt, mf)
z$xlevels$U <- rownames(pem)
z$call <- cl
mtat <- attributes(mt)
str2lang(
paste(
"list(",
paste(c(as.character(mtat$variables)[-1L],"U"), collapse=","),
")",
sep=""
)
) -> mtat$variables
mtat$factors <- rbind(cbind(mtat$factors, U=0L), U=0L)
mtat$factors["U","U"] <- 1L
mtat$term.labels <- c(mtat$term.labels,"U")
mtat$order <- c(mtat$order,1L)
str2lang(
paste(
"list(",
paste(c(as.character(mtat$predvars)[-1L],"U"), collapse=","),
")",
sep=""
)
) -> mtat$predvars
mtat$dataClasses <- c(mtat$dataClasses, U="pem")
z$terms <- as.formula(paste(paste(deparse(mt), collapse=""), "U", sep=" + "))
attributes(z$terms) <- mtat
z$model <- mf
z
}
forward <- function(..., k = 2, corrected = TRUE) {
a <- aic(included, ..., k=k, corrected=corrected)
repeat{
b <- scanCandidate(..., k=k, corrected=corrected)
if(min(b) < a) {
wh <- which.min(b)
assign("included", c(included, candidate[wh]), inherits=TRUE)
assign("candidate", candidate[-wh], inherits=TRUE)
a <- b[wh]
} else
break
}
assign("pemModel", getModel(...), inherits=TRUE)
included
}
promote <- function(wh, ...) {
idx <- match(wh, candidate)
if(any(is.na(idx))) {
warning("Unknown or unavailable eigenfunction(s): ",
paste(wh[is.na(idx)],collapse=", "))
idx <- idx[!is.na(idx)]
}
if(length(idx)) {
assign("included", c(included, candidate[idx]), inherits=TRUE)
assign("candidate", candidate[-idx], inherits=TRUE)
}
assign("pemModel", getModel(...), inherits=TRUE)
included
}
structure(
list(
auxModel = auxModel,
resetPEM = resetPEM,
getPEM = function() pem,
aic = aic,
getIncluded = function() included,
getCandidate = function() candidate,
scanCandidate = scanCandidate,
forward = forward,
promote = promote,
pemModel = function() pemModel
),
class = "pemlm"
)
}
#'
#' @describeIn pemlm-class
#'
#' Print PEM Linear Model
#'
#' A print method for pemlm-class objects.
#'
#' @method print pemlm
#'
#' @export
print.pemlm <- function(x, ...) {
cat("A PEM-based linear model\n-------------------\n\n")
cat("Auxiliary linear model:\n")
print(x$auxModel)
incl <- x$getIncluded()
cat("Includes",if(!length(incl)) "no" else length(incl),"eigenfunction(s)\n")
if(length(incl)) {
cat("PEM-based linear model:\n")
print(x$pemModel())
}
invisible(NULL)
}
#'
#' @describeIn pemlm-class
#'
#' Summarize pemlm Model
#'
#' A summary method for \code{pemlm-class} objects.
#'
#' @method summary pemlm
#'
#' @export
summary.pemlm <- function(object, ...) {
out <- if(is.null(object$pemModel())) object$auxModel else object$pemModel()
summary(out, ...)
}
#'
#' @describeIn pemlm-class
#'
#' Anova of a pemlm Model
#'
#' An anova method for \code{pemlm-class} objects.
#'
#' @method anova pemlm
#'
#' @export
anova.pemlm <- function(object, ...) {
out <- if(is.null(object$pemModel())) object$auxModel else object$pemModel()
anova(out, ...)
}
#'
#' @describeIn pemlm-class
#'
#' Predict Trait Values
#'
#' A predict method for \code{pemlm-class} objects.
#'
#' @method predict pemlm
#'
#' @export
predict.pemlm <- function(object,
newdata = data.frame(row.names = rownames(newloc)),
newloc, se.fit = FALSE,
interval = c("none","confidence","prediction"),
level = 0.95, ...) {
tt <- delete.response(terms(object$auxModel))
m <- model.frame(tt, newdata, na.action=na.fail,
xlev=object$auxModel$xlevels)
if(!is.null(cl <- attr(tt, "dataClasses")))
.checkMFClasses(cl, m)
X <- model.matrix(tt, m, contrasts.arg=object$auxModel$contrasts)
pm <- object$pemModel()
incl <- object$getIncluded()
n <- NROW(pm$residuals)
m <- NCOL(pm$residuals)
p <- pm$rank
if(p) {
p1 <- seq_len(p)
piv <- pm$qr$pivot[p1]
} else
stop("The model has no rank.")
U <- predict.PEM(object$getPEM(), newloc)
XU <- cbind(X, U[, incl, drop = FALSE])[, piv, drop = FALSE]
if(p < NCOL(XU))
stop("The fit is rank-deficient; consider removing model terms.")
interval <- match.arg(interval)
if(se.fit || interval != "none") {
## Verify if the vf is suitable, otherwise, vf <- NULL
if(!is.null(vf <- attr(U,"vf")))
if(NROW(vf) != NROW(X) || NCOL(vf) != m) {
warning("Unsuitable variance factor vector or matrix.")
vf <- NULL
}
df <- pm$df.residual
XRinv <- XU[,piv,drop=FALSE] %*% qr.solve(qr.R(pm$qr)[p1, p1])
if(m > 1L) {
res.var <- colSums(pm$residuals^2)/df
ip <- drop(XRinv^2 %*% matrix(res.var, p, m, TRUE, list(NULL, names(res.var))))
} else {
res.var <- sum(pm$residuals^2)/df
ip <- drop(XRinv^2 %*% rep(res.var, p))
}
}
if(m > 1L) {
predictor <- drop(XU %*% pm$coefficients[piv,])
if(interval != "none") {
tfrac <- qt((1 - level)/2, df)
if(interval == "confidence") {
hwid <- tfrac * sqrt(ip + if(!is.null(vf)) vf/n)
} else if(interval == "prediction") {
matrix(res.var, nrow(X), m, TRUE,
list(rownames(X), names(res.var))) -> tmp
hwid <- tfrac * sqrt(ip + tmp + if(!is.null(vf)) vf)
}
array(
c(predictor, predictor + hwid, predictor - hwid),
dim = c(NROW(predictor),NCOL(predictor),3L),
dimnames = list(
rownames(predictor),
colnames(predictor),
c("fit","lwr","upr")
)
) -> predictor
}
if(se.fit) {
se <- sqrt(ip + if(!is.null(vf)) vf/n)
list(fit = predictor, se.fit = se, df = df, residual.scale = sqrt(res.var))
} else
predictor
} else {
predictor <- drop(XU %*% pm$coefficients[piv])
if(interval != "none") {
tfrac <- qt((1 - level)/2, df)
switch(
interval,
confidence = sqrt(ip + if(!is.null(vf)) vf/n),
prediction = sqrt(ip + res.var + if(!is.null(vf)) vf)
) * tfrac -> hwid
predictor <- cbind(predictor, predictor + hwid %o% c(1, -1))
colnames(predictor) <- c("fit", "lwr", "upr")
}
if(se.fit) {
se <- sqrt(ip + if(!is.null(vf)) vf/n)
list(fit = predictor, se.fit = se, df = df, residual.scale = sqrt(res.var))
} else
predictor
}
}
#'
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.