##
## TODO: use new.env(parent=emptyenv())
## Use ::: to use hidden functions
## Delete function in gspline
##
## General parametric polynomial splines: March 10, 2019
##
## Note that Cmat, Xf, Dmat, Xmat, basis, Pcon
## are unexported functions that are used in the
## vignette explaining the principles behind
## gspline.
##
## They are also used within gspline and Xf and Xmat
## are also explictitly added to the environment
## of the spline function created by gspline,
##
##
#' General parametric regression splines with variable degrees and smoothness
#'
#' This function creates functions (closures) that implement general polynomial splines
#' with possibly different degrees
#' in each interval and different orders of smoothness at each knot,
#' including the possibility of allowing a discontinuity at a knot.
#'
#' The functions created by \code{gspline} can also be used to generate portions
#' of a linear hypothesis matrix that can be used with a Wald test
#' to estimate features of the spline such as derivatives of various orders,
#' discontinuities, etc.
#'
#' Many polynomial regression splines can be generated by 'plus' functions
#' although the resulting basis for the spline may be ill conditioned. For
#' example, a 'quadratic spline' (a spline that is quadratic in each interval
#' with and smooth with a first derivative at each knot) with knots at 1 and 3
#' can be fitted with:
#'
#' \code{ plus <- function(x,y) ifelse(x>0,y,0)}\cr
#' \code{ lm(y ~ x + I(x^2) + plus(x-1,(x-1)^2) + plus(x-3,(x-3)^2))}
#'
#' All 'standard' polynomial splines with the same
#' degree in each interval and continuity of
#' order one less than the degree at each knot can be constructed in this
#' fashion. A convenient aspect of this parametrization of the spline is that
#' the estimated coefficients have a simple interpretation. The coefficients
#' for 'plus' terms represent the 'saltus' (jump) in the value of a coefficient
#' at the knot. Testing whether the true value of the saltus in a coefficient is 0 is
#' equivalent to a test for the need to retain the corresponding knot.
#'
#' This approach does not work for some more complex splines with different
#' degrees or different orders of continuity at the knots.
#'
#' Many techniques for fitting splines generate a basis for the spline
#' (columns of the design matrix) that has good numerical properties but
#' whose coefficients do not have a simple interpretation.
#'
#' The \code{gspline} function generates functions representing splines
#' with arbitrary degrees in each interval and
#' arbitrary smoothness at each knot. Any set of orders of derivatives can
#' be contrained to be continuous at any particular knot.
#' The parametrization produces coefficients
#' that have a simple interpretation. For a spline of degree p at x = 0,
#' coefficients correspond to the 1st, 2nd, ... pth derivative at 0. Additional
#' coefficients correspond to each free discontinuities at each knot.
#'
#' A disadvantage of spline functions generated by \code{gspline}
#' is that the spline basis may
#' be poorly conditioned. The impact of this problem can be mitigated by
#' rescaling the x variable so that it has an appropriate range. For example,
#' with a spline whose highest degree is cubic, ensuring that x has a range
#' between -10 and 10 should avert numerical problems.
#'
#' Let \code{sp} denote a spline matrix created by \code{gspline}. For example
#' \code{sp <- gspline(knots = c(-1,0,2), degree = c(2,3,3,2), smoothness = c(1,-1,1))}
#' creates a spline function \code{sp} that can be used in linear formulas to
#' fit a polynomial spline with knots at -1, 0 and 2, with degrees 2, 3, 3, and 2 in
#' the four intervals bounded by the knots and with "C 1" continuity (continuous
#' first derivative) at the knots -1 and 2, and a possible discontinuity in value
#' at the knot 0.
#'
#' The linear formula to fit this spline has the form \code{y ~ 1 + sp(x)} which can
#' be augmented with other regressors in the usual way.
#'
#' Called with a single argument (e.g. \code{sp(x)}) \code{sp}
#' returns a matrix of regressors and can be used in linear model formulas,
#' e.g \code{y ~ sp(x)}, \code{y ~ A * sp(x)} or \code{y ~ A / sp(x) -1}
#' if \code{A} is a factor to fit separate splines
#' within each level of \code{A}.
#'
#' \code{sp} can be called with additional
#' argument to geneate portions of linear hypothesis matrices that
#' can be used test or estimate various values or derivatives of the spline. For
#' example, \code{sp(c(-1,-1,-1,0,2), D = c(2, 2, 2, 0, 1), limit = c(-1, 1, 0, 0, 1))}
#' will return a matrix of coefficients to estimate, respectively, the limits from the
#' left and from the right of the second derivative at -1, the difference (jump or saltus)
#' in this derivative at -1, the size of the discontinuity in the value of the function
#' at 0, and the first derivative at 1, which being continuous there has the same value
#' whether a limit is taken from the right or from the left.
#'
#' A function to fit a cubic spline with knots at 5 and 10 is generated with
#' \code{sp <- gspline(knots = c(5, 10), degree = c(3, 3, 3), smoothness = c(2, 2))}, or more
#' briefly: \code{sp <- gspline(c(5, 10), 3, 2)} since the degree and smoothness arguments
#' are recycled as required by the number of knots.
#'
#' A natural cubic spline with knots at 5 and 10 and boundary knots at 0 and 20 can be
#' created with
#' \code{sp <- gspline(c(0, 5, 10, 20), c(1, 3, 3, 3, 1), c(2, 2, 2, 2))}.
#'
#' A step function with steps at 0, 1 and 2 would have the form:
#' \code{sp <- gspline(c(0, 1, 2), 0, -1)}.
#'
#' @param knots vector of knots
#' @param degree vector giving the degree of the spline in each interval. Note
#' the number of intervals is equal to the number of knots + 1. A value of 0
#' corresponds to a constant in the interval.
#' @param smoothness vector with the degree of smoothness at each knot (0 =
#' continuity, 1 = smooth with continuous first derivative, 2 = continuous
#' second derivative, etc. The value -1 allows a discontinuity at the knot.
#' Alternative, a list each of whose elements specifies the derivatives
#' that must be continuous at each knot. This allow the set of continuous
#' derivatives to skip a value. For example, to specify that a function
#' can have a discontinuity but must have the same limiting slope and second
#' derivative (curvature) on either side
#' of the discontinuity, the smoothness vector for the corresponding knot would have the
#' value \code{c(1,2)}, thus leaving the value (denoted by 0) unconstrained.
#' @param periodic if TRUE a periodic spline is generated on the base interval
#' [0,max(knots)]. A constraint is generated so that the coefficients generate, in effect,
#' the same fitted values and derivatives to the right of max(knots) as they do to the right of 0.
#' Note that all knots must be strictly positive. Since the knot at 0 corresponds to the
#' knot at 'max(knots)', the knot at 0 must not be explicitly specified. Note also that the
#' degree in the interval to the right of 'max(knots)' must be equal to that in the
#' interval to the left of 'min(knots)'. If these are specified to be equal,
#' \code{gspline} uses the maximum for both intervalsl.
#' The continuity contraints associated
#' with the last knot are, in effect, continuity constraints at 0.
#' @param constraints provides a vector or matrix specifying additional linear contraints on the
#' 'full' parametrization consisting of blocks of polynomials of degree equal
#' to max(degree) in each of the length(knots)+1 intervals of the spline. See
#' below for examples of a spline that is 0 outside of its boundary knots.
#' @param estimates provides a vector or matrix specifying additional linear combination of
#' the parameters in the
#' 'full' parametrization that should be estimated by estimated coefficients of the spline.
#' @param signif number of significant digits used to label coefficients
#' @param exclude (not yet implemented) number of leading columns to drop from spline matrix: 0:
#' excludes the intercept column, 1: excludes the linear term as well. Terms
#' that are excluded from the spline matrix can be modeled explicitly.
#' @param a (not yet implemented) a 'periodic' knot at which a spline repeats itself
#' @param n (not yet implemented) the maximal 'order', i.e. maximal degree + 1, of a periodic spline
#' @param x (parameter of the closure created by \code{gspline}) value(s) where the
#' spline is evaluated. This is the only parameter used when generating portions of
#' a model matrix.
#' @param D (parameter of the closure created by \code{gspline}) value(s) the order of
#' derivatives to be estimated when creating a linear hypothesis matrix. The default
#' is \code{D=0} corresponding to the value of the spline.
#' @param limit (parameter of the closure created by \code{gspline}) specifies
#' whether a derivative or value should be estimated as as limit from the left
#' \code{limit = -1}, from the right, \code{limit = -1}, or the difference of the
#' two limits, \code{limit = -0}. The default is \code{limit = -1}.
#' @return \code{gspline} returns a closure that creates portions of model matrices
#' or of linear hypothesis matrices for Wald test for a general polynomial spline.
#'
#' @section Warning: The estimands generated by spline functions created
#' by \code{gspline} are designed so the
#' coefficients are interpretable as changes in derivatives at knots. The
#' resulting matrix is not designed to have optimal numerical properties.
#'
#' @examples
#' ## Fitting a quadratic spline
#' simd <- data.frame( age = rep(1:50, 2), y = sin(2*pi*(1:100)/5) + rnorm(100),
#' G = rep( c('male','female'), c(50,50)))
#' # define a function generating the spline quadratic spline with linear extrapolation
#' sp <- gspline(knots = c(10,25,40), degree = c(1,2,2,1),
#' smooth = c(1,1,1))
#'
#' fit <- lm( y ~ sp(age)*G, simd)
#'
#' require(lattice)
#' xyplot( predict(fit) ~ age, simd, groups = G, type = 'l')
#' summary(fit)
#'
#' fitg <- update(fit, y ~ G/sp(age) - 1)
#' summary(fitg)
#'
#' ## Linear hypotheses
#' L <- list("Overall test of slopes at 20" = rbind(
#' "Female slope at age 20" = c( F20 <- cbind( 0 , sp(20, D = 1), 0 , 0 * sp(20, D = 1))),
#' "Male slope at age 20" = c( M20 <- cbind( 0 , sp(20, D = 1), 0 , 1 * sp(20, D = 1))),
#' "Difference" = c(M20 - F20))
#' )
#' wald(fit, L)
#'
#' ## Right and left second derivatives at knots and saltus (jump)
#'
#' L <- list("Second derivatives and saltus for female curve at knot at 25" =
#' cbind( 0, sp(c(25,25,25), D = 2, limit =c(-1,1,0)), 0,0,0,0))
#' L
#' wald(fit, L)
#'
#' ##
#' ## Second derivatives and saltus for female curve at knot at 25
#' ## numDF denDF F.value p.value
#' ## 2 92 2.289052 0.10711
#' ## Estimate Std.Error DF t-value p-value Lower 0.95 Upper 0.95
#' ## D2|25- 0.004942 0.006258 92 0.789720 0.43172 -0.007487 0.017372
#' ## D2|25+ -0.010879 0.006011 92 -1.809958 0.07357 -0.022817 0.001059
#' ## D2|25+-D2|25- -0.015822 0.011665 92 -1.356298 0.17832 -0.038990 0.007347
#'
#' L0 <- list(
#' "hat" = rbind(
#' "females at age=20" = c( 1, sp(20), 0, 0 * sp(20)),
#' "males at age=20" = c( 1, sp(20), 1, 1* sp(20))),
#' "male-female" = rbind(
#' "at 20" = c( 0 , 0*sp(20), 1, 1*sp(20))))
#' wald(fit, L0)
#'
#' L1 <- list(
#' "D(yhat)/D(age)" =
#' rbind( "female at age = 25" = c(0, sp(25,1), 0, 0*sp(25,1)),
#' "male at x = 25" = c(0, sp(25,1), 0, 1*sp(25,1))))
#' wald( fit, L1)
#'
#' # Cubic spline:
#'
#' sp <- gspline(c(5,10), c(3,3,3), c(2,2))
#'
#' # The parameters indicate that a cubic polynomial is used in each of the three intervals
#' # and that the second derivative is continuous at each knot.
#'
#' # Cubic natural spline:
#' # is a cubic spline in bounded intervals with linear components
#' # in each unbounded interval and continuous first derivative at the
#' # two knots for unbounded intervals.
#'
#' sp.natural <- gspline(c(0,5,10,15), c(1,3,3,3,1), c(2,2,2,2))
#'
#' # Quadratic and linear splines and step functions, respectively:
#'
#' sp.quad <- gspline(c(5,10), c(2,2,2), c(1,1))
#' sp.lin <- gspline(c(5,10), c(1,1,1), c(0,0))
#' sp.step <- gspline(c(5,10), c(0,0,0), c(-1,-1))
#'
#' # When the same degree is used for all
#' # intervals and knots, it suffices to give it once:
#'
#' sp.quad <- gspline(c(5,10), 2, 1)
#' sp.lin <- gspline(c(5,10), 1, 0)
#' sp.step <- gspline(c(5,10), 0, -1
#'
#' # An easy way to specify a model in which a knot is dropped
#' # is to force a degree of continuity equal to the degree of adjoining
#' # polynomials, e.g. to drop the knot at 10, use:
#'
#' sp.1 <- gspline(c(5,10), c(3,3,3), c(2,3))
#'
#' # This is sometimes easier than laboriously rewriting the
#' # spline function for each null hypothesis.
#'
#' # Depending on the maximal degree of the spline, the range of x should not be
#' # excessive to avoid numerical problems. The spline matrix generated is 'raw'
#' # and values of max(abs(x))^max(degree) may appear in the matrix. For
#' # example, for a cubic spline, it might be desirable to rescale x and/or
#' # recenter x so abs(x) < 100 if that is not already the case. Note that the
#' # knots need to be correspondingly transformed.
#'
#' # The naming of coefficients should allow them to be easily interpreted. For
#' # example:
#'
#' sp <- gspline(c(3,7), c(2,3,2), c(1,1))
#' sp(0:10, c(1,1))
#'
#' ## Output:
#' ## D1|0 D2|0 C2|3 C3|3 C2|7
#' ## D1|0 1 0 0 0.0 0
#' ## D1|1 1 1 0 0.0 0
#' ## D1|2 1 2 0 0.0 0
#' ## D1|3 1 3 0 0.0 0
#' ## D1|4 1 4 1 0.5 0
#' ## D1|5 1 5 2 2.0 0
#' ## D1|6 1 6 3 4.5 0
#' ## D1|7 1 7 4 8.0 0
#' ## D1|8 1 8 5 12.0 1
#' ## D1|9 1 9 6 16.0 2
#' ## D1|10 1 10 7 20.0 3
#'
#' # The coefficient for the first regressor is the first derivative at x = 0;
#' # for the second regressor, the second derivative at 0; the third, the saltus
#' # (change) in the second derivative at x = 3, the fourth, the saltus in the
#' # third derivative at x = 3 and, finally, the saltus in the second derivative
#' # at x = 7.
#'
#' sp <- gspline (c(3, 7) , c(2,3,2), c(1,2))
#' zd <- data.frame( x = seq(0,10, .5),
#' y = seq(0,10,.5)^2 + rnorm(21))
#' fit <- lm( y ~ sp( x ), zd)
#' summary(fit)
#' Ls <- cbind(0, sp(c(1,2,3,3,3,5,7,7,7,8),
#' D = 2, limit =c(-1,-1,-1,1,0,-1,-1,1,0,-1)))
#' zapsmall(Ls )
#' #
#' # Note that estimates of features that are continuous at a point
#' # are indicated without a direction for the limit, e.g. "D2(1)"
#' # is the second derivative at 1.
#' ##
#' ## Output:
#' ##
#' ## D1|0 D2|0 C2|3 C3|3
#' ## D2|1 0 0 1 0 0
#' ## D2|2 0 0 1 0 0
#' ## D2|3- 0 0 1 0 0
#' ## D2|3+ 0 0 1 1 0
#' ## D2|3+-D2|3- 0 0 0 1 0
#' ## D2|5 0 0 1 1 2
#' ## D2|7 0 0 1 1 4
#' ## D2|7 0 0 1 1 4
#' ## D2|7+-D2|7- 0 0 0 0 0
#' ## D2|8 0 0 1 1 4
#'
#' wald( fit, list( 'second derivatives' = Ls))
#'
#' # Note that some coefficients that are 0 by design may lead to invalid degrees
#' # of freedom and t-values.
#'
#' @export
gspline <- function(
knots,
degree = 3,
smoothness = pmax(
pmin(degree[-1],degree[-length(degree)]) - 1, 0),
intercept = 0,
constraints = NULL,
estimates = NULL,
periodic = FALSE,
tolerance = 1e-14,
debug = FALSE
) {
basis <- function(X, tol = 1e-9) {
# returns linear independent columns
# with possible pivoting
q <- qr(X, tol = tol)
sel <- q$pivot[seq_len(q$rank)]
ret <- X[, sel, drop = FALSE]
colnames(ret) <- colnames(X)[sel]
ret
}
Cmat <-
function( knots, degree, smoothness, lin = NULL, intercept = 0, signif = 3) {
# GM 2013-06-13
# add lin: contraints,
# generates constraint matrix
# GM: 2019_02_22: added facility to input smoothness as a list
# for smoothness constraints that don't have the form 0:smoothness[[i]]
dm = max(degree)
# intercept
cmat = NULL
if( !is.null(intercept)) cmat = rbind( cmat, "Intercept" =
Xf( intercept, knots, dm, D=0 ))
# continuity constraints
for ( i in seq_along(knots) ) {
k <- knots[i]
sm <- smoothness[[i]]
if ( max(sm) > -1 ) { # sm = -1 corresponds to discontinuity
if(!is.list(smoothness)) sm <- 0:sm
dmat <- Xf( k, knots, dm, D = sm, F ) -
Xf( k, knots, dm, D = sm, T )
rownames( dmat ) <- paste0("C", sm, '|', signif(k, signif))
cmat <- rbind( cmat, dmat)
}
}
# reduced degree constraints
degree <- rep( degree, length.out = length(knots) + 1)
for ( i in seq_along( degree)) {
di <- degree[i]
if ( dm > di ) {
dmat <- diag( (length(knots) + 1) * (dm +1)) [ (i - 1)*(dm + 1) +
1 + seq( di+1,dm), , drop = F]
rownames( dmat ) = paste( "I.", i,".",seq(di+1,dm),sep = '')
cmat = rbind( cmat, dmat)
}
}
# add additional linear constraints
if( ! is.null(lin)) cmat <- rbind(cmat,lin) # GM:2013-06-13
rk = qr(cmat)$rank
spline.rank = ncol(cmat) - rk
attr(cmat,"ranks") = c(npar.full = ncol(cmat), C.n = nrow(cmat),
C.rank = rk , spline.rank = spline.rank )
attr(cmat,"d") = svd(cmat) $ d
cmat
}
Emat <- function(knots, degree, smoothness , intercept = FALSE, signif = 3) {
if (length(degree) < length(knots) + 1) degree <- rep( degree,
length.out = length(knots) + 1)
dmin <- min(degree)
dmax <- max(degree)
imax = length(degree)
# derivatives for changes to estimate
# Quick temporary fix if smoothness is a list
# - generate extra terms that will be dropped due to collinearity with constraints
if(is.list(smoothness)) smoothness <- rep(-1, length(knots))
zeroi = as.numeric(cut( 0, c(-Inf, sort(knots), Inf))) # index of interval containing 0
dzero = degree[zeroi] # degree of interval containing 0
cmat = Xf(0, knots, degree = dmax, D = seq( 1, dzero)) # derivatives at 0 - may be discarded
if ( imax > zeroi ){ # intervals to the right of the interval containing 0
for ( i in (zeroi+1): imax) {
d.right = degree[ i ]
d.left = degree[ i-1 ]
k = knots[ i - 1 ]
sm = smoothness[ i - 1 ]
if ( d.right > sm ) {
dmat =Xf( k , knots, dmax, D = seq(sm+1,d.right) , F ) -
Xf( k , knots, dmax, D = seq(sm+1,d.right) , T )
# rownames( dmat ) = paste( "C", seq(sm+1,d.right) , ,signif(k, signif),").",
# , sep = '')
rownames( dmat ) = paste0( "C", seq(sm+1, d.right),'|', signif(k, signif))
cmat = rbind( cmat, dmat)
}
}
}
if ( zeroi > 1 ){
for ( i in zeroi: 2) {
d.right = degree[ i ]
d.left = degree[ i-1 ]
k = knots[ i - 1 ]
sm = smoothness[ i - 1 ]
if ( d.left > sm ) {
dmat = Xf( k , knots, dmax, D = seq(sm+1,d.left) , F ) -
Xf( k , knots, dmax, D = seq(sm+1,d.left) , T )
rownames( dmat ) = paste0( "C", seq(sm+1, d.left),'|', signif(k, signif))
cmat = rbind( cmat, dmat)
}
}
}
cmat
}
Xmat <- function(x,
degree,
D = 0,
signif = 3) {
# Returns rows of design matrix if D = 0
# or linear hypothesis matrix for D-th derivative
if (length(D) < length(x))
D = rep(D, length.out = length(x))
if (length(x) < length(D))
x = rep(x, length.out = length(D))
xmat = matrix(x, nrow = length(x), ncol = degree + 1)
expvec <- 0:degree
coeffvec <- rep(1, degree + 1)
expmat <- NULL
coeffmat <- NULL
for (i in 0:max(D)) {
expmat <- rbind(expmat, expvec)
coeffmat <- rbind(coeffmat, coeffvec)
coeffvec <- coeffvec * expvec
expvec <- ifelse(expvec > 0, expvec - 1, 0)
}
X = coeffmat[D + 1,,drop = FALSE] * xmat ^ expmat[D + 1,, drop = FALSE]
xlab = signif(x, signif)
rownames(X) = ifelse(D == 0,
paste('f(', xlab , ')', sep = ''),
paste0("D", D, "|", xlab)) # new style
colnames(X) = paste("X", 0:(ncol(X) - 1), sep = "")
class(X) <- c('gspline_matrix',class(X))
X
}
Xf <-
function(x,
knots,
degree = 3,
D = 0,
right = TRUE,
periodic = FALSE,
signif = 3) {
# Returns block diagonal matrix (if x ordered)
# with design matrix or linear hypothesis matrix
# for the 'full' model with contrained parameters.
#
# With the default, right == TRUE,
# if x is at a knot then it is included in
# in the lower interval.
# With right = FALSE, it is included in the higher
# interval. This is needed when building
# derivative constraints at the knot
if(periodic) {
period <- max(knots)
xx <- x %% period
if(right) xx[xx==0] <- period
x <- xx
knots <- knots[-length(knots)]
}
xmat = Xmat (x, degree, D , signif)
k = sort(knots)
g = cut(x, c(-Inf, k, Inf), right = right)
ret <- do.call('cbind',
lapply(seq_along(levels(g)), function(i)
(g == levels(g)[i]) * xmat))
if(periodic) rownames(ret) <- paste0(rownames(ret), '/',signif(period, signif)) # new style
class(ret) <- c('gspline_matrix',class(ret))
ret
}
#
# Value and derivatives at 0
#
# And all discontinuities at knots
#
Dmat <- function(knots, degree, periodic = FALSE, signif = 3) {
dm <- max(degree)
cmat <- Xf(0, knots, dm, D=0:dm, periodic = periodic)
n_knots <- length(knots)
for (i in seq_len(n_knots - 1) ) {
k <- knots[i]
dmat <- Xf(k, knots, dm, D = seq(0,dm), F, periodic = periodic) -
Xf(k, knots, dm, D = seq(0,dm), T, periodic = periodic)
# rownames( dmat ) <- paste( "C(",signif(k, signif),").",
# seq(0,dm), sep = '')
rownames( dmat ) <- paste0( "C",seq(0,dm),'|',signif(k, signif)) # new style for derivatives
if(periodic) rownames(dmat) <- paste0(rownames(dmat),'/',signif(max(k),signif))
cmat <- rbind( cmat, dmat)
}
k <- knots[length(knots)]
if(periodic) {
dmat <- Xf(0, knots, dm, D = seq(0,dm), F, periodic = periodic) -
Xf(k, knots, dm, D = seq(0,dm), T, periodic = periodic)
# rownames( dmat ) <- paste( "C(0 mod ",signif(k, signif),").",
# seq(0,dm), sep = '')
rownames( dmat ) <- paste( "C", seq(0,dm),'|', signif(k, signif),'/',signif(max(k), signif)) # new style for derivatives
cmat <- rbind(cmat, dmat)
} else {
dmat <- Xf(k, knots, dm, D = seq(0,dm) , F ,periodic = periodic) -
Xf(k, knots, dm, D = seq(0,dm) , T ,periodic = periodic)
# rownames( dmat ) <- paste( "C(",signif(k, signif),").",
# seq(0,dm), sep = '')
rownames( dmat ) <- paste0( "C",seq(0,dm),'|',signif(k, signif)) # new style for derivatives
cmat <- rbind(cmat, dmat)
}
class(cmat) <- c('gspline_matrix',class(cmat))
cmat
}
#
# Parameters to constrain
#
# TODO: Change so degree can be a list
#
Pcon <- function(knots, degree, periodic) {
degree <- rep( degree, length.out = length(knots) + 1)
if(periodic) {
if(degree[length(degree)] != degree[1]) warning("For periodic splines, the degree of the last and first intervals should match")
knots <- knots[-length(knots)]
degree <- degree[-length(degree)]
}
dm <- max(degree)
cmat <- NULL
for ( i in seq_along(degree)) {
di <- degree[i]
if ( dm > di ) {
dmat <- diag( (length(knots) + 1) * (dm +1)) [
(i - 1)*(dm + 1) + 1 + seq( di+1,dm), , drop = F]
rownames( dmat ) = paste( "I.", i,".",seq(di+1,dm),sep = '')
cmat = rbind( cmat, dmat)
}
}
if(!is.null(cmat)) class(cmat) <- c('gspline_matrix',class(cmat))
cmat
}
#
# Parse knots, degree and smoothness to create
# constraint and estimation matrices:
# - Identify rows of Dmat that are used for contraints
# and rows used for estimates
if(periodic) {
if(min(knots) <= 0) stop('For a periodic spline all knots must be positive')
}
if(any(knots != sort(knots))) stop('Knots must be in ascending order')
degree <- rep(degree, length.out = length(knots) + 1)
max_degree <- max(degree)
smoothness <- rep(smoothness, length.out = length(knots))
if(!is.list(smoothness)) smoothness <-
lapply(smoothness, function(n) if(n<0) -1 else 0:n)
Dmat_smoothness_indices <-
unlist(
lapply(seq_along(smoothness), function(i)
smoothness[[i]] + (max_degree + 1) * i + 1)
)
Dmat_smoothness_indices <- Dmat_smoothness_indices[unlist(smoothness) > -1]
# Build constraints
constraint_mat <- Dmat(knots, degree, periodic = periodic)[Dmat_smoothness_indices, ,drop = F]
constraint_mat <- rbind(constraint_mat, Pcon(knots, degree, periodic = periodic))
if(!is.null(constraints))
constraint_mat <- rbind(constraint_mat, constraints)
if(!is.null(intercept)) {
constraint_mat <-
rbind(constraint_mat, Xf(intercept, knots, max_degree, periodic = periodic))
Dmat_smoothness_indices <- c(1,Dmat_smoothness_indices)
}
cmat <- t(basis(t(constraint_mat)))
estimate_mat <-
rbind(estimates,
Dmat(knots, degree, periodic = periodic)[-Dmat_smoothness_indices, ,drop = FALSE])
# the following is necessary in case some of the derivatives
# estimated at 0 have been coerced to 0 by other constraints
estimate_mat <- t(basis(t(rbind(constraint_mat, estimate_mat))))
estimate_mat <- estimate_mat[-seq_len(nrow(constraint_mat)),,drop = FALSE]
emat <- estimate_mat
A <- rbind(constraint_mat, estimate_mat)
if(debug) svs <- svd(A,nu=0,nv=0)
G <- solve( rbind(constraint_mat, estimate_mat),
diag(ncol(constraint_mat))[,-seq_len(nrow(constraint_mat)), drop = FALSE])
colnames(G) <- rownames(estimate_mat)
#
# create closure
#
fun <- function(x, D = NULL, limit = 1) {
if(is.null(D)) {
ret <- Xf(x, knots, max_degree, periodic = periodic) %*% G # model matrix
} else {
# Hypothesis matrix
D <- rep(D, length.out = length(x))
limit <- rep(limit, length.out = length(x))
left <- Xf(x,
knots = knots,
degree = max_degree,
D = D,
periodic = periodic,
right = TRUE)
right <- Xf(x,
knots = knots,
degree = max_degree,
D = D,
periodic = periodic,
right = FALSE)
cleft <- c(1,0,-1)[ match(limit, c(-1,1,0)) ]
cright <- c(0,1,1)[ match(limit, c(-1,1,0)) ]
# disp((right - left) %*% G)
continuous <- apply((right - left) %*% G, 1, function(x) all(abs(x) < 10 * tolerance))
# disp(continuous)
raw <- left * cleft + right * cright
ret <- raw %*% G
nam <- rownames(ret)
nam <- sub("^f","g", nam)
# nam_left <- sub("\\)","-)", nam)
# nam_right <- sub("\\)","+)", nam)
nam_left <- sub("(/|$)","-\\1", nam) # new style
nam_right <- sub("(/|$)","+\\1", nam) # new style
nam_jump <- paste0(nam_right ,"-", nam_left)
#
# If we're testing for a jump and the derivative is
# continuous we should show the identity of the
# jump tested.
#
# If we're testing for a derivative at a knot
# and the derivative is continuous, we should
# indicate that by removing the direction indicator
#
# So:
# If we're testing a jump we use the full name of
# the linear combination even if it happens to be
# at a point of continuity
# Otherwise, used the directional notation only
# if the tested derivative is discontinuous
rownames(ret) <-
ifelse(limit == 0,
nam_jump,
ifelse(continuous, nam,
ifelse(limit == 1, nam_right, nam_left)))
}
class(ret) <- c('gspline_matrix', class(ret))
ret
}
# Modify colnames of G matrix to show direction of limit
# if there is a discontinuity of derivatives at origin
if(!is.null(intercept)) {
dest <- fun(rep(intercept,max(degree)+1), D = 0:max(degree), limit = -1)
tos <- grep('-$|-/',rownames(dest), value = TRUE)
froms <- sub('([0-9])-','\\1', tos)
froms <- sub('(.*)','^\\1$', froms)
froms <- sub('\\|','\\\\|', froms)
for(i in seq_along(tos)) {
colnames(G) <- sub(froms[i], tos[i], colnames(G))
}
}
class(fun) <- c('gspline', class(fun))
fun
}
#' @rdname gspline
#' @export
print.gspline <- function(x,
show = c('knots','degree','smoothness','G','constraint_mat','estimate_mat')) {
cat('Spline function created by gspline\n')
ret <- (Filter(Negate(is.function), sapply(ls(environment(x)), get, environment(x))))
ret <- lapply(ret, function(x) if(is.numeric(x)) zapsmall(x) else x)
if(!is.null(show)) ret <- ret[show]
print(ret)
invisible(ret)
}
#' @rdname gspline
#' @export
print.gspline_matrix <- function(x, ...) {
x <- zapsmall(x)
NextMethod()
}
#' @rdname gspline
#' @export
knots.gspline <- function(sp) {
environment(sp)$knots
}
if(FALSE) {
sp <- gspline(c(-1,0,1),3,2)
sp(c(-1,0,2,3))
sp(c(-1,-1,-1,0,2,3),2,limit=c(-1,0,1,0,0,0))
print(sp)
sp <- gspline(c(-2,0,2),3,1)
sp(seq(-1,2))
sp(seq(-1,2),1)
sp(c(0,0,0,0),c(0,1,2,3))
sp(c(0,0,0,0),c(0,1,2,3), limit = -1)
# test periodic splines
zd <- data.frame(x=-10:10)
zd <- within(zd, y <- x + abs(x) + .01*rnorm(x))
sp0 <- gspline(0,1,0)
fit <- lm(y ~ sp0(x), zd)
summary(fit)
print(sp)
# problem?: parameters estimate from left, not right
zd <- within(zd, yd <- x + abs(x) + I(x > 0) + .01 * rnorm(x))
spd <- gspline(0,1, -1)
fit <- lm(yd ~ spd(x), zd)
summary(fit)
spd(c(0,0,0,0,0,0), D=c(0,0,0,1,1,1), limit = c(-1,0,1,-1,0,1)) %>%
{Lmat <- .}
wald(fit, cbind(0,Lmat))
plot(yd ~ x, zd)
print(sp)
# both level and slope are limits from the left:
# probably should leave that way because it's easier
# to add change than to go back
# TODO: need to add limit directions on G matrix
load("../data/Unemp.RData", verbose = T)
plot(unemployment ~ date, data=Unemp, type="b")
dim(Unemp)
dd <- Unemp
dd$y <- dd$unemployment
dd$x <- 1:nrow(dd)
f1 <- function(x) cbind(Sin=sin(2*pi*x/12),Cos=cos(2*pi*x/12)) # fundamental
f2 <- function(x) structure(f1(x/2), names = c('Sin2','Cos2'))
f3 <- function(x) structure(f1(x/3), names = c('Sin3','Cos3'))
f4 <- function(x) structure(f1(x/4), names = c('Sin4','Cos4'))
per <- gspline(c(3,6,9,12),c(1,2,2,2,1),0, periodic = TRUE)
per2 <- gspline(c(3,6,9,12),2,1, periodic = TRUE)
per3 <- gspline(c(3,6,9,12),3,2, periodic = TRUE)
per4 <- gspline(c(3,6,9,12),4,3, periodic = TRUE)
perstep <- gspline(c(3,6,9,12),0,-1, periodic = TRUE)
per
fits <- list(
hetero = lm(y ~ per(x), dd),
quad = lm(y ~ per2(x), dd),
cubic = lm(y ~ per3(x), dd),
quartic = lm(y ~ per4(x), dd),
step = lm(y ~ perstep(x), dd),
f1 = lm(y ~ f1(x), dd),
f2 = lm(y ~ f1(x) + f2(x), dd),
f3 = lm(y ~ f1(x) + f2(x) + f3(x), dd),
f4 = lm(y ~ f1(x) + f2(x) + f3(x) + f4(x), dd)
)
fits %>% lapply(summary)
fits %>% sapply(AIC)
pred <- list(x=seq(0,24,.01))
plot(c(0,24),c(6.5,8.5), type = 'n')
fits %>% lapply(function(f) lines(pred$x,predict(f, newdata = pred)))
sp <- gspline(nrow(dd)*1:7/8,2,1)
sp3 <- gspline(nrow(dd)*1:7/8,3,2)
fit <- lm(y ~ per3(x) +sp(x), dd)
fit3 <- lm(y ~ per3(x) +sp3(x), dd)
fit4 <- lm(y ~ per3(x) +sp3(x), dd)
with(dd, {
plot(date,y, pch = '.',cex = 2)
lines(date, predict(fit))
lines(date, predict(fit3), col = 'red')
}
)
pred <- data.frame(x=seq(1,24,.01))
with(pred, {
plot(c(1,24), c(9,11), xlab = 'month', ylab = 'unemployment', type = 'n')
lines(x, predict(fit, newdata = pred))
})
AIC(fit,fit3)
library(effects)
allEffects(fit)
dd$month <- dd$x %% 12
fit <- lm(y ~ sp3(x) + per3(month), dd)
summary(fit)
plot(allEffects(fit))
#
# Test names
#
sp <- gspline(c(0,1,2,3), 2, c(-1,0,1,2))
sp(0:4)
knots(sp)
(evs <- expand.grid(k=knots(sp), D = 1:3))
with(evs, sp(k,D,limit = 0))
evs$salt <- apply(with(evs, abs(sp(k, D, limit = 0))), 1, sum) > 1e-14
evs
dd <- expand.grid(x = seq(-1, 4, .1))
dd <- within(dd, y <- x^2 + .1* rnorm(x) + (x > 0))
library(latticeExtra)
library(spida2)
library(gnew)
xyplot(y ~ x , dd)
fit <- lm(y ~ sp(x),dd)
dd$fit <- predict(fit)
trellis.focus()
panel.xyplot(dd$x, dd$fit, type = 'l')
trellis.unfocus()
summ(fit)
evs <- expand.grid(limit = -1:1, x=-1:4, D = 0:2)
with(evs, sp(x,D,limit))
evs$intercept <- with(evs, 1*(D==0))
Lmat <- with(evs, cbind(intercept, sp(x,D, limit)))
knots(sp)
rownames(Lmat) <- with(evs, paste(x,D,limit))
wald(fit, Lmat)
}
#' Synonym for gspline
#'
#' For transitional reasons with the forthcoming
#' gspline package, \code{Gspline} is a synonym for
#' \code{gspline}.
#'
#' @rdname gspline
#' @export
Gspline <- gspline
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.