#' Build a splines model
#'
#' Given the coordinates of the observations, and the degree, this function puts
#' into place a sensible number of spline knots and computes the incidence
#' matrix B and the covariance matrix U
#'
#' Relies on \code{splines::splineDesign()}, which uses a C function call to
#' compute the splines coefficients.
#'
#' \code{sparse} matrices take less memory, but also take longer to compute
#' with. This is probably convenient only for really big datasets in comparison
#' with RAM size. The covariance matrix is always stored in sparse format, as it
#' is particularly sparse.
#'
#' @param coordinates matrix(-like) of observation coordinates
#' @param n.knots numeric. Vector of length two with an integer number of knots
#' in each dimension.
#' @param autofill logical. If \code{TRUE} (default) it will try to fill gaps in
#' the rows or columns. Otherwise, it will treat gaps the same way as adjacent
#' rows or columns.
#' @param degree integer. Degree of the B-spline polynomials.
#' @param sparse logical. If \code{TRUE} (default) the incidence matrix will be stored in
#' sparse format. Current implementation ignores a value of FALSE.
#' @param strategy character. Strategy for placing spline knots. Only
#' \code{uniformgrid} available for the moment.
#' @param ... Not used.
#'
#' @return A list with elements \code{incidence.matrix}, \code{structure.matrix}
#' and \code{structure.type}, which is a string indicating either
#' \code{covariance} or \code{precision}.
#' @importFrom methods as
breedr_splines <- function(coordinates,
n.knots = NULL,
autofill = TRUE,
degree = 3,
sparse = TRUE,
strategy = 'uniformgrid',
...) {
strategy <- match.arg(strategy)
strategy <- match.arg(strategy)
## Coordinates of knots in each dimension
if (strategy == 'uniformgrid') {
knots <- distribute_knots_uniformgrid(coordinates, n.knots, autofill)
}
B <- bispline_incidence(knots, coordinates, degree + 1, sparse)
# The sparse incidence matrix weights 3.5 less than the non-sparse version,
# but it takes longer processing time. Besides, the gmean computation it also
# takes way longer. And at the end, I need the dense incidence matrix anyway.
U1d <- lapply(vapply(knots, 'length', 0) - degree - 1, build.splines1d)
# U1d <- lapply(sapply(knots, length) - degree - 1, build.splines1d.sparse)
U <- kronecker(U1d[[1]], U1d[[2]])
# browser()
# Scaling so that the characteristic marginal variance equals 1/sigma^2
# Sorbye and Rue (2014)
sc <- suppressWarnings(
# if B has some row of zeroes (e.g. missing coordinate for an individual)
# then gmean warns about it and removes the 0 from computation.
# this is ok. but I don't want the warning to be displayed.
ifelse(sparse,
gmean(Matrix::diag(B %*% Matrix::tcrossprod(U, B))),
gmean(diag(B %*% tcrossprod(U, B))))
)
Uscaled <- as(U/sc, 'CsparseMatrix')
## Build the spatial effect, return the knot coordinates
## and further specify the splines class
ans <- spatial(coordinates, incidence = B, covariance = Uscaled)
ans$knots <- knots
ans$param <- list(n.knots = unname(vapply(knots, length, 1)))
ans$degree <- degree
class(ans) <- c('splines', class(ans))
return(ans)
}
#' Distribute knots uniformly in a grid
#'
#' For each dimension, places the given number of knots evenly spaced covering
#' the range of coordinates plus a small margin.
#'
#' The margin is calculated as half the median separation between observations.
#' Furthermore, three more knots are added with equal spacing at each side, for
#' each dimension.
#' @inheritParams breedr_splines
distribute_knots_uniformgrid <- function (coordinates, n.knots, autofill) {
# lattice of spatial locations
# possibly with automatic filling of empty rows or columns
obs.loc <- loc_grid(coordinates, autofill)
# Determine the number of (inner) knots for rows and columns
# or use the number provided by the user
if (is.null(n.knots))
n.knots <- determine.n.knots(sapply(obs.loc, 'length'))
# Spacing between rows/columns
obs.step <- sapply(obs.loc, function(x) summary(diff(x)))['Median',]
# Place the knots evenly spaced
# inspired by mgcv::place.knots()
# but adding a margin before and after the extreme observations
place.knots <- function(ol, os, nk) {
if( nk == 1) x <- sum(range(ol))/2
else x <- seq(head(ol, 1) - os/2, tail(ol, 1) + os/2, 'length' = nk)
return(x)
}
# Coordinates of inner knots for rows/columns
knots.inner <- mapply(place.knots,
obs.loc, obs.step, n.knots,
SIMPLIFY = FALSE)
# Add n.add additional knots before and after the inner knots
add.knots <- function(x.inner, n.add, coordinates) {
# Use the mean spacing between the inner knots
# or use the observations range if there are less than two inner knots
if( length(x.inner) > 1) {
spacing <- mean(diff(x.inner))
extremes <- range(x.inner)
# } else if( length(x.inner) == 1) {
# spacing <- diff(range(coordinates))/2
# extremes <- rep(x.inner, 2)
} else {
stop('At least two inner knots are needed for each dimension')
}
x <- c(extremes[1] - (n.add:1)*spacing,
x.inner,
extremes[2] + (1:n.add)*spacing)
return(x)
}
# Return row and column knots in a list
# as the numbers might be different
knots <- mapply(add.knots, knots.inner, 3, obs.loc, SIMPLIFY = FALSE)
# Include info about the strategy used as attributes
ans <- structure(knots,
strategy = 'uniformgrid',
n.knots = n.knots,
autofill = TRUE)
return(ans)
}
#' Determine a sensible number of knots
#'
#' This function computes a reasonable number of inner knots to be used for a
#' basis of unidimensional B-splines
#' @param n an integer vector of sample sizes
#' @param cutoff a numeric vector of cutoff values
#' @param rate a numeric vector of rates at which the number of knots increases with
#' the sample size
#' @return An integer vector with the number of knots for each sample size
#' @references \strong{Ruppert, D. (2002)}. Selecting the number of knots for penalized
#' splines. \emph{Journal of Computational and Graphical Statistics} 11,
#' 735–757.
determine.n.knots <- function(n, cutoff = 4, rate = 0.3) {
# # Inspired by hisemi::n.knots
# as.integer(trunc(pmin(n, cutoff + pmax(0, n - cutoff)^rate)))
# but taking into account that we will add three more knots at each side
if(any(n < 7)) stop('Too few data for a bidimensional spatial model')
nk <- trunc(pmin(n - 6, cutoff + pmax(0, n - 6 - cutoff)^rate))
return(nk)
}
# Build Covariance Matrix in 1D
build.splines1d <- function(n, model = 'GreenSilverman2003') {
# U matrix (Green & Silverman, 2003)
temp <- diag(4, n)
subdiag <- rbind(cbind(0, diag(1, n-1)), 0)
return((temp + subdiag + t(subdiag))/6)
}
build.splines1d.sparse <- function(n, model = 'GreenSilverman2003') {
# U matrix (Green & Silverman, 2003)
U <- Matrix::sparseMatrix(i = c(1:n, 1:(n-1)),
j = c(1:n, 2:n),
x = c(rep(4, n), rep(1, n-1))/6,
dims = c(n, n),
symmetric = TRUE)
return(U)
}
#' Incidence Matrix of Bidimensional Splines
#'
#' Compute the incidence matrix as a tensor product of B-spline bases, given the
#' knots, coordinates and order.
#'
#' Need at least 2*ord -1 knots (typically, 7) but in fact, we need at least
#' 2*ord unless we set outer.ok = TRUE in splineDesign (which we do not want)
#'
#' @param knots list with numeric vectors of knot positions with non-decreasing
#' values for each dimension
#' @param xx 2-column matrix of coordinates at which to evaluate the
#' bidimensional splines
#' @param ord integer order of the spline functions (equals degree + 1)
#' @param sparse logical indicating if the result should be given in sparse
#' format
#'
#' @references \strong{Eilers, P H C and B D Marx (2003)}. Multivariate calibration with
#' temperature interaction using two-dimensional penalized signal regression.
#' \emph{Chemometrics and Intelligent Laboratory Systems} 66(2), 159-174.
bispline_incidence <- function (knots, xx, ord, sparse) {
## Account for missing coordinates
## setting corresponding incidence values to zero
miss.idx <- list(x = is.na(xx[, 1]),
y = is.na(xx[, 2]))
if (sparse) {
b.x <- Matrix(0, nrow(xx), length(knots[[1]]) - ord)
b.y <- Matrix(0, nrow(xx), length(knots[[2]]) - ord)
} else {
b.x <- matrix(0, nrow(xx), length(knots[[1]]) - ord)
b.y <- matrix(0, nrow(xx), length(knots[[2]]) - ord)
}
b.x[!miss.idx$x] <-
splines::splineDesign(knots[[1]], xx[!miss.idx$x, 1], ord = ord) #, sparse=TRUE)
b.y[!miss.idx$y] <-
splines::splineDesign(knots[[2]], xx[!miss.idx$y, 2], ord = ord) #, sparse=TRUE)
# The argument sparse was introduced at some point between versions 2.15.0
# and 3.0.2 of the splines package.
# sparseness is useful but unnecessary. I prefer to keep this
# more widely compatible, and make things sparse myself.
ones.y <- matrix(1, ncol = ncol(b.y))
ones.x <- matrix(1, ncol = ncol(b.x))
B <- suppressMessages(kronecker(b.x, ones.y)*kronecker(ones.x, b.y))
return(B)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.