R/pi_basis.R

Defines functions Predict.matrix.pi.smooth smooth.construct.pi.smooth.spec

Documented in Predict.matrix.pi.smooth smooth.construct.pi.smooth.spec

#' Parametric Interaction basis constructor
#' 
#' The \code{pi} basis is appropriate for smooths of multiple variables. Its 
#' purpose is to parameterize the way in which the basis changes with one of
#' those variables. For example, suppose the smooth is over three variables,
#' \eqn{x}, \eqn{y}, and \eqn{t}, and we want to parameterize the effect of
#' \eqn{t}. Then the \code{pi} basis will assume \eqn{f(x,y,t) = \sum_k
#' g_k(t)*f_k(x,y)}, where the \eqn{g_k(t)} functions are pre-specified and the
#' \eqn{f_k(x,y)} functions are estimated using a bivariate basis. An example of
#' a parametric interaction is a linear interaction, which would take the form 
#' \eqn{f(x,y,t) = f_1(x,y) + t*f_2(x,y)}.
#' 
#' All functions \eqn{f_k()} are defined using the same basis set.
#' Accordingly, they are penalized using a single block-diagonal penalty matrix
#' and one smoothing parameter. Future versions of this function may be able
#' to relax this assumption.
#' 
#' @param object a smooth specification object, generated by, e.g.,
#'    \code{s(x, y, t, bs="pi", xt=list(g=list(g1, g2, g3)))}. For
#'    transformation functions \code{g1}, \code{g2}, and \code{g3}, see
#'    Details below.
#' @param data a list containing the variables of the smooth (\code{x},
#'    \code{y}, and \code{t} above), as well as any \code{by} variable.
#    The first variable will
#    be the index of the \eqn{f_k} functions (\code{x} above), and the second 
#    will be the interacting variable (\code{t} above). The \code{by} variable
#    is the last element.
#' @param knots a list containing any knots supplied for basis setup - in same
#'    order and with same names as \code{data}. Can be \code{NULL}.
#' 
#' @details
#' \code{object} should be defined (using \code{s()}) with an \code{xt}
#' argument. This argument is a list that could contain any of the following
#' elements:
#' \enumerate{
#'   \item \code{g}: the functions \eqn{g_k(t)}, specified as described below.
#'   \item \code{bs}: the basis code used for the functions \eqn{f_k()}; defaults
#'     to thin-plate regression splines, which is mgcv's default. The same
#'     basis will be used for all \eqn{k}.
#'   \item \code{idx}: an integer index indicating which variable from
#'     \code{object$term} is to be parameterized, i.e., the \eqn{t} variable;
#'     defaults to \code{length(object$term)}
#'   \item \code{mp}: flag to indicate whether multiple penalties should
#'     be estimated, one for each \eqn{f_k()}. Defaults to \code{TRUE}. If
#'     \code{FALSE}, the penalties for each \eqn{k} are combined into a single
#'     block-diagonal penalty matrix (with one smoothing parameter).
#'   \item \code{...}: further \code{xt} options to be passed onto the basis for
#'     \eqn{f_k()}.
#' }
#' \code{xt$g} can be entered in one of the following forms:
#' \enumerate{
#'   \item a list of functions of length \eqn{k}, where each function is of
#'   one argument (assumed to be \eqn{t})
#'   \item one of the following recognized character strings: code{"linear"},
#'   indicating a linear interaction, i.e. \eqn{f(x,t) = f_1(x)+t*f_2(x)};
#'   \code{"quadratic"}, indicating a quadratic interaction, i.e.
#'   \eqn{f(x,t) = f_1(x)+t*f_2(x) + t^2*f_3(x)}; or
#'   \code{"none"}, indicating no interaction with \eqn{t}, i.e.
#'   \eqn{f(x,t)=f_1(x)}.
#' }
#' The only one of the above elements that is required is \code{xt}.
#' If default values for \code{bs}, \code{idx}, and \code{mp} are desired,
#' \code{xt} may also be entered as the \code{g} element itself; i.e.
#' \code{xt=g}, where \code{g} is either the list of functions or an acceptable
#' character string.
#' 
#' Additional arguments for the lower-dimensional basis over \code{f_k} may
#' be entered using the corresponding arguments of \code{s()}, e.g.
#' \code{k}, \code{m}, \code{sp}, etc. For example,
#' \code{s(x, t, bs="pi", k=15, xt=list(g="linear", bs="ps"))}
#' will define a linear interaction with \code{t} of a univariate p-spline
#' basis of dimension 15 over \code{x}.
#' 
#' @author Fabian Scheipl and Jonathan Gellar
#' @export
#' @importFrom mgcv Predict.matrix
#' @importFrom mgcv smooth.construct
#' @return An object of class "pi.smooth". See
#'    \code{\link[mgcv]{smooth.construct}} for the elements it will contain.
#' 

smooth.construct.pi.smooth.spec <- function(object, data, knots) {
  # Constructor method for parametric bivariate basis
  
  # Input Checks
  if (length(object$term) < 2)
    stop("\"pi\" smooth must have at least two variables")
  
  xt <- object$xt
  bs = newxt <- NULL
  mp <- FALSE
  idx <- length(object$term)
  if (is.null(xt)) {
    warning("No interaction functions are entered (using xt$g), defaulting to \"none\".")
    xt <- "none"
  }
  
  # Extract g functions, basis, and index from xt
  if (is.character(xt)) {
    g <- xt
  } else if (is.list(xt)) {
    if (is.null(xt$g)) {
      # xt is the g list itself
      g <- xt
    } else {
      if (!all(names(xt) %in% c("g", "bs", "xt", "idx", "mp")))
        warning("Ignoring unrecognized elements of \"xt\"")
      g  <- xt$g
      bs <- xt$bs
      newxt <- xt$xt
      if (!is.null(xt$idx)) idx <- xt$idx
      if (!is.null(xt$mp)) mp <- xt$mp
    }
  } else stop("xt must be a character string or list")
  
  # Convert g to a list of functions
  if (is.character(g)) {
    if (g=="none")
      g <- list(function(t) t^0)
    else if (g=="linear")
      g <- list(function(t) t^0, function(t) t^1)
    else if (g=="quadratic")
      g <- list(function(t) t^0, function(t) t^1, function(t) t^2)
    else stop("Unrecognized character string for interaction functions g")
  } else if (is.function(g)) {
    g <- list(g)
  } else if (is.list(g)) {
    if (any(sapply(g, class) != "function"))
      stop("Incorrect input for interaction functions g")
  } else stop("Incorrect input for interaction functions g")
  
  # Extract tvar based on idx
  tvar <- data[[object$term[idx]]]
  dat  <- data[c(object$term[-idx])]
  
  # Set up design for t
  n_transform <- length(g)
  g_name <- paste0(object$term[idx], "_g_", 1:n_transform)
  g_X <-  vapply(g, function(f)
    do.call(f, list(t=tvar)), numeric(length(tvar)))
  
  # set up smooth over x
  args <- list(k=object$bs.dim, fx=object$fixed, m=object$p.order,
               xt=newxt, id=object$id, sp=object$sp)
  if (!is.null(bs)) args$bs <- bs
  callargs <- sapply(object$term[-idx], as.name)
  names(callargs) <- NULL
  smoothspec <- do.call(mgcv::s, append(callargs, args))
  if (object$by != "NA") {
    smoothspec$by <- object$by
    dat[object$by] <- data[object$by]
  }
  
  # sm will be the return object; sm0 is the reduced-dimension smooth
  sm = sm0 <- mgcv::smooth.construct(smoothspec, data = dat, knots = knots)
  
  # Modify sm:
  sm$term <- c(object$term[-idx], object$term[idx])
  sm$bs.dim <-  sm$bs.dim * n_transform
  sm$null.space.dim <-  sm$null.space.dim * n_transform
  sm$dim <- length(object$term)
  sm$label <- paste0("g(", object$term[idx], ")*", sm$label)
  sm$xt <- object$xt
  sm$X <- mgcv::tensor.prod.model.matrix(list(g_X, sm$X))
  if (mp) {
    sm$S <- lapply(1:n_transform, function(k) {
      diag(as.numeric((1:n_transform) == k)) %x% sm$S[[1]]
    })
    sm$rank <- sapply(sm$S, function(x) qr(x)$rank)
  } else {
    sm$S <- list(diag(n_transform) %x% sm$S[[1]])
  }
  if (!is.null(sm$df)) sm$df <- sm$df * n_transform
  sm$bs <- strsplit(class(smoothspec), ".", fixed=T)[[1]][1]
  sm$g <- g
  sm$sm <- sm0
  
  ## ctrl-c-v from smooth.construct.tp.smooth.spec:
  if (!is.null(sm$drop.null)) {
    if (sm$drop.null > 0) {
      M <- mgcv::null.space.dimension(sm$dim, sm$p.order[1])
      ind <- 1:(sm$bs.dim - sm$null.space.dim)
      if (FALSE) { ## nat param version
        np <- nat.param(sm$X, sm$S[[1]], rank=sm$bs.dim - sm$null.space.dim,
                        type=0)
        sm$P <- np$P
        sm$S[[1]] <- diag(np$D)
        sm$X <- np$X[,ind]
      } else { ## original param
        sm$S[[1]] <-sm$S[[1]][ind,ind]
        sm$X <-sm$X[, ind]
        sm$cmX <- colMeans(object$X)
        sm$X <- sweep(object$X, 2, object$cmX)
      }
      sm$null.space.dim <- 0
      sm$df <- sm$df - M
      sm$bs.dim <-sm$bs.dim -M
      sm$C <- matrix(0,0,ncol(object$X)) # null constraint matrix
    }
  }
  class(sm) <- "pi.smooth"
  sm
}



#' Predict.matrix method for pi basis
#' 
#' @param object a \code{pi.smooth} object created by
#'   \code{\link{smooth.construct.pi.smooth.spec}}, see
#'   \code{\link[mgcv]{smooth.construct}}
#' @param data  see \code{\link[mgcv]{smooth.construct}}
#' @return design matrix for PEER terms
#' @author Jonathan 
#' @export

Predict.matrix.pi.smooth <- function(object, data) {
  # Prediction method for parameteric bivariate basis
  
  # Separate tvar from data
  idx  <- length(object$term)
  tvar <- data[[object$term[idx]]]
  dat  <- data[-idx]
  
  # Set up design for t
  g_X <-  vapply(object$g, function(f)
    do.call(f, list(t=tvar)), numeric(length(tvar)))
  
  # Evaluate object$sm at new locations
  pmat <- mgcv::Predict.matrix(object = object$sm, data = dat)
  
  # Return tensor product with g_X
  mgcv::tensor.prod.model.matrix(list(g_X, pmat))
}
jgellar/mgcvTrans documentation built on Aug. 10, 2022, 4:02 p.m.