Nothing
#' Construct a function-on-function regression term
#'
#' Defines a term \eqn{\int^{s_{hi, i}}_{s_{lo, i}} X_i(s)\beta(t,s)ds} for
#' inclusion in an \code{mgcv::gam}-formula (or \code{bam} or \code{gamm} or
#' \code{gamm4:::gamm4}) as constructed by \code{\link{pffr}}. \cr Defaults to a
#' cubic tensor product B-spline with marginal first order differences penalties
#' for \eqn{\beta(t,s)} and numerical integration over the entire range
#' \eqn{[s_{lo, i}, s_{hi, i}] = [\min(s_i), \max(s_i)]} by using Simpson
#' weights. Can't deal with any missing \eqn{X(s)}, unequal lengths of
#' \eqn{X_i(s)} not (yet?) possible. Unequal integration ranges for different
#' \eqn{X_i(s)} should work. \eqn{X_i(s)} is assumed to be numeric (duh...).
#'
#' If \code{check.ident==TRUE} and \code{basistype!="s"} (the default), the
#' routine checks conditions for non-identifiability of the effect. This occurs
#' if a) the marginal basis for the functional covariate is rank-deficient
#' (typically because the functional covariate has lower rank than the spline
#' basis along its index) and simultaneously b) the kernel of Cov\eqn{(X(s))} is
#' not disjunct from the kernel of the marginal penalty over \code{s}. In
#' practice, a) occurs quite frequently, and b) occurs usually because
#' curve-wise mean centering has removed all constant components from the
#' functional covariate. \cr If there is kernel overlap, \eqn{\beta(t,s)} is
#' constrained to be orthogonal to functions in that overlap space (e.g., if the
#' overlap contains constant functions, constraints "\eqn{\int \beta(t,s) ds =
#' 0} for all t" are enforced). See reference for details.\cr A warning is
#' always given if the effective rank of Cov\eqn{(X(s))} (defined as the number
#' of eigenvalues accounting for at least 0.995 of the total variance in
#' \eqn{X_i(s)}) is lower than 4. If \eqn{X_i(s)} is of very low rank,
#' \code{\link{ffpc}}-term may be preferable.
#'
#' @param X an n by \code{ncol(xind)} matrix of function evaluations
#' \eqn{X_i(s_{i1}),\dots, X_i(s_{iS})}; \eqn{i=1,\dots,n}.
#' @param yind \emph{DEPRECATED} used to supply matrix (or vector) of indices of
#' evaluations of \eqn{Y_i(t)}, no longer used.
#' @param xind vector of indices of evaluations of \eqn{X_i(s)},
#' i.e, \eqn{(s_{1},\dots,s_{S})}
#' @param basistype defaults to "\code{\link[mgcv]{te}}", i.e. a tensor product
#' spline to represent \eqn{\beta(t,s)}. Alternatively, use \code{"s"} for
#' bivariate basis functions (see \code{mgcv}'s \code{\link[mgcv]{s}}) or
#' \code{"t2"} for an alternative parameterization of tensor product splines
#' (see \code{mgcv}'s \code{\link[mgcv]{t2}}).
#' @param integration method used for numerical integration. Defaults to
#' \code{"simpson"}'s rule for calculating entries in \code{L}. Alternatively
#' and for non-equidistant grids, \code{"trapezoidal"} or \code{"riemann"}.
#' \code{"riemann"} integration is always used if \code{limits} is specified
#' @param L optional: an n by \code{ncol(xind)} matrix giving the weights for
#' the numerical integration over \eqn{s}.
#' @param limits defaults to NULL for integration across the entire range of
#' \eqn{X(s)}, otherwise specifies the integration limits \eqn{s_{hi}(t),
#' s_{lo}(t)}: either one of \code{"s<t"} or \code{"s<=t"} for
#' \eqn{(s_{hi}(t), s_{lo}(t)) = (t, 0]} or \eqn{[t, 0]}, respectively, or a
#' function that takes \code{s} as the first and \code{t} as the second
#' argument and returns TRUE for combinations of values \code{(s,t)} if
#' \code{s} falls into the integration range for the given \code{t}. This is
#' an experimental feature and not well tested yet; use at your own risk.
#' @param splinepars optional arguments supplied to the \code{basistype}-term.
#' Defaults to a cubic tensor product B-spline with marginal first difference
#' penalties, i.e. \code{list(bs="ps", m=list(c(2, 1), c(2,1)))}. See
#' \code{\link[mgcv]{te}} or \code{\link[mgcv]{s}} in \pkg{mgcv} for details
#' @param check.ident check identifiability of the model spec. See Details and
#' References. Defaults to \code{TRUE}.
#'
#' @seealso \code{mgcv}'s \code{\link[mgcv]{linear.functional.terms}}
#' @return A list containing \item{call}{a "call" to
#' \code{\link[mgcv]{te}} (or \code{\link[mgcv]{s}} or \code{\link[mgcv]{t2}})
#' using the appropriately constructed covariate and weight matrices}
#' \item{data}{a list containing the necessary covariate and weight matrices}
#'
#' @author Fabian Scheipl, Sonja Greven
#' @references For background on \code{check.ident}:\cr Scheipl, F., Greven,
#' S. (2016). Identifiability in penalized function-on-function regression
#' models. Electronic Journal of Statistics, 10(1), 495--526.
#' \url{https://projecteuclid.org/journals/electronic-journal-of-statistics/volume-10/issue-1/Identifiability-in-penalized-function-on-function-regression-models/10.1214/16-EJS1123.full}
#' @export
#' @importFrom MASS Null
# FIXME: weights for Simpson's rule on non-equidistant grids
# TODO: allow X to be of class fd (?)
# TODO: allow X to be a factor -- would result in one beta(s,t) surface for each level? (?)
# TODO: by variables
# TODO: add FAME penalty?
ff <- function(X,
yind = NULL,
xind=seq(0, 1, l=ncol(X)),
basistype= c("te", "t2", "ti", "s", "tes"),
integration=c("simpson", "trapezoidal", "riemann"),
L=NULL,
limits=NULL,
splinepars=if(basistype != "s") {
list(bs="ps", m=list(c(2, 1), c(2,1)), k=c(5, 5))
} else {
list(bs="tp", m=NA)
},
check.ident=TRUE
){
n <- nrow(X)
nxgrid <- ncol(X)
stopifnot(all(!is.na(X)))
# check & format index for X
if(is.null(dim(xind))){
xind <- t(as.matrix(xind))
}
stopifnot(ncol(xind) == nxgrid)
if(nrow(xind)== 1){
xind <- matrix(as.vector(xind), nrow=n, ncol=nxgrid, byrow=T)
} else {
stop("<xind> has to be supplied as a vector or matrix with a single row.")
}
stopifnot(nrow(xind) == n)
stopifnot(all.equal(order(xind[1,]), 1:nxgrid))
basistype <- match.arg(basistype)
integration <- match.arg(integration)
# scale xind to [0, 1] and check for reasonably equidistant gridpoints
xind.sc <- xind - min(xind)
xind.sc <- xind.sc/max(xind.sc)
diffXind <- t(round(apply(xind.sc, 1, diff), 3))
if(is.null(L) & any(apply(diffXind, 1, function(x) length(unique(x))) != 1) &&
# gridpoints for any X_i(s) not equidistant?
integration=="simpson"){
message("Non-equidistant grid detected for ", deparse(substitute(X)),
".\n Changing to trapezoidal rule for integration.")
integration <- "trapezoidal"
}
if(!is.null(limits) && integration != "riemann"){
integration <- "riemann"
message("<limits>-argument detected. ",
"Changing to Riemann sums for numerical integration.")
}
# FIXME: figure out weights for simpson's rule on non-equidistant grids instead of all this...
#make weight matrix for by-term
if(!is.null(L)){
stopifnot(nrow(L) == n, ncol(L) == nxgrid)
#TODO: check whether supplied L is compatibel with limits argument
} else {
L <- switch(integration,
"simpson" = {
# \int^b_a f(t) dt = (b-a)/gridlength/3 * [f(a) + 4*f(t_1) + 2*f(t_2) + 4*f(t_3) + 2*f(t_3) +...+ f(b)]
((xind[,nxgrid]-xind[,1])/nxgrid)/3 *
matrix(c(1, rep(c(4, 2), length=nxgrid-2), 1), nrow=n, ncol=nxgrid, byrow=T)
},
"trapezoidal" = {
# \int^b_a f(t) dt = .5* sum_i (t_i - t_{i-1}) f(t_i) + f(t_{i-1}) =
# (t_2 - t_1)/2 * f(a=t_1) + sum^{nx-1}_{i=2} ((t_i - t_i-1)/2 + (t_i+1 - t_i)/2) * f(t_i) + ... +
# + (t_nx - t_{nx-1})/2 * f(b=t_n)
diffs <- t(apply(xind, 1, diff))
.5 * cbind(diffs[,1],
t(apply(diffs, 1, filter, filter=c(1,1)))[,-(nxgrid-1)],
diffs[,(nxgrid-1)])
},
"riemann" = {
# simple quadrature rule:
# \int^b_a f(t) dt = sum_i (t_i-t_{i-1})*(f(t_i))
diffs <- t(apply(xind, 1, diff))
#assume delta(t_0=a, t_1) = avg. delta
cbind(rep(mean(diffs),n), diffs)
}
)
}
LX <- L*X
if(!is.null(limits)){
if(!is.function(limits)){
if(!(limits %in% c("s<t","s<=t"))){
stop("supplied <limits> argument unknown")
}
if(limits=="s<t"){
limits <- function(s, t){
s < t
}
} else {
if(limits=="s<=t"){
limits <- function(s, t){
(s < t) | (s == t)
}
}
}
}
}
# assign unique names based on the given args
xindname <- paste(deparse(substitute(X)), ".smat", sep="")
yindname <- paste(deparse(substitute(X)), ".tmat", sep="")
LXname <- paste("L.", deparse(substitute(X)), sep="")
# make call
splinefun <- as.symbol(basistype) # if(basistype=="te") quote(te) else quote(s)
frmls <- if(exists(basistype, asNamespace("mgcv"), inherits = FALSE)) {
formals(getFromNamespace(basistype, ns="mgcv"))
} else {
formals(basistype)
}
frmls <- modifyList(frmls[names(frmls) %in% names(splinepars)], splinepars)
call <- as.call(c(
list(splinefun,
x = as.symbol(substitute(xindname)),
z = as.symbol(substitute(yindname)),
by =as.symbol(substitute(LXname))),
frmls))
if(check.ident){
## check whether (number of basis functions) < (number of relevant eigenfunctions of X)
evX <- svd(X, nu = 0, nv = 0)$d^2
maxK <- max(1, min(which((cumsum( evX)/sum(evX)) >= .995)))
bsdim <- eval(call)$margin[[1]]$bs.dim
if(maxK <= 4)
warning("Very low effective rank of <", deparse(match.call()$X),
"> detected. ", maxK,
" largest eigenvalues of its covariance alone account for >99.5% of ",
"variability. <ffpc> might be a better choice here.")
if(maxK < bsdim){
warning("<k> larger than effective rank of <",deparse(match.call()$X),
">. Model identifiable only through penalty.")
}
if(basistype!="s"){
# check whether span(Null(X)), span(L * B_s%*%Null(penalty)) are disjunct:
# set up marginal spline basis:
smConstr <- get(paste0("smooth.construct.",
attr(eval(call)$margin[[1]], "class")))
basisdata <- list(sort(unique(xind)))
names(basisdata) <- xindname
basis <- smConstr(object=list(term=xindname,
bs.dim=ifelse(!is.null(call$k[1]), call$k[1], -1),
fixed=FALSE, dim=1,
p.order=if(!is.null(call$m)) call$m[[1]] else NA,
by=NA),
data=basisdata, knots=list())
# get condition number of marginal design matrix
evDs <- svd(LX %*% basis$X, nu = 0, nv = 0)$d^2
logCondDs <- log10(max(evDs)) - log10(min(evDs))
N.X <- Null(t(X))
## for the artificial examples in the paper below produces surprising
## results if integration != "riemann":
## -- usually more constraints than expected, e.g. constraints on "linearish"
## even though nullspace of X only contains constants by construction --
## unless diag(L[1,]) is rm'ed from N.pen: non-constant integration wts
## seem to implicate higher order eigenfunctions in the kernel as well, e.g.
## sv's of N.pen have high frequency oscillations, etc (...waves hands...)
N.pen <- diag(L[1,]) %*% basis$X %*% Null(basis$S[[1]])
if(any(c(NCOL(N.X) == 0, NCOL(N.pen) == 0))) {
nullOverlap <- 0
} else {
nullOverlap <- trace_lv(svd(N.X)$u, svd(N.pen)$u)
}
if(nullOverlap > 0.95 & logCondDs > 6){
warning("Found badly conditioned design matrix for the functional effect",
" and kernel overlap for <", deparse(match.call()$X),
"> and the specified basis and penalty. ",
"Enforcing constraint to force function components in this overlap to 0 ",
"since coefficient surface is not identifiable in that function space.",
"See Scheipl/Greven (2016) for details & alternatives.")
C_overlap <- {
tmp <- svd(qr.fitted(qr(N.X), N.pen))
t(tmp$u[, which(tmp$d > max(tmp$d)*.Machine$double.eps^.66), drop=FALSE]) %*%
basis$X
}
if(is.null(call$xt)) {
call$xt <- list(C1 = C_overlap)
} else {
call$xt <- c(call$xt, C1 = C_overlap)
}
call$bs <- c("ps_c",
sub(".smooth.spec", "", attr(eval(call)$margin[[2]], "class"), fixed=TRUE))
call[[1]] <- as.symbol("ti")
call$mc <- c(TRUE, FALSE)
} else {
}
}
}
return(list(call=call, xind=xind[1,], LX=LX, L=L,
xindname=xindname, yindname=yindname,
LXname=LXname, limits=limits))
}#end ff()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.